Sophie

Sophie

distrib > Mandriva > current > x86_64 > by-pkgid > d41cc2729f5f0fbbd2607b14a49457f3 > files > 95

kernel-xen-2.6.32.11-2mdv2010.1.src.rpm

From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 2.6.32
Patch-mainline: 2.6.32

 This patch contains the differences between 2.6.31 and 2.6.32.

Acked-by: Jeff Mahoney <jeffm@suse.com>
Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches.py

--- sle11sp1-2010-03-29.orig/arch/x86/ia32/ia32entry-xen.S	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/ia32/ia32entry-xen.S	2009-11-06 14:53:39.000000000 +0100
@@ -20,18 +20,15 @@
 #define AUDIT_ARCH_I386		(EM_386|__AUDIT_ARCH_LE)
 #define __AUDIT_ARCH_LE	   0x40000000
 
-#ifndef CONFIG_AUDITSYSCALL
-#define sysexit_audit int_ret_from_sys_call
-#define sysretl_audit int_ret_from_sys_call
-#endif
-
 #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
 
 	.macro IA32_ARG_FIXUP noebp=0
 	movl	%edi,%r8d
 	.if \noebp
+	jmp	ia32_common
 	.else
 	movl	%ebp,%r9d
+ia32_common:
 	.endif
 	xchg	%ecx,%esi
 	movl	%ebx,%edi
@@ -39,12 +36,12 @@
 	.endm 
 
 	/* clobbers %eax */	
-	.macro  CLEAR_RREGS _r9=rax
+	.macro  CLEAR_RREGS offset=0, _r9=rax
 	xorl 	%eax,%eax
-	movq	%rax,R11(%rsp)
-	movq	%rax,R10(%rsp)
-	movq	%\_r9,R9(%rsp)
-	movq	%rax,R8(%rsp)
+	movq	%rax,\offset+R11(%rsp)
+	movq	%rax,\offset+R10(%rsp)
+	movq	%\_r9,\offset+R9(%rsp)
+	movq	%rax,\offset+R8(%rsp)
 	.endm
 
 	/*
@@ -144,17 +141,7 @@ ENTRY(ia32_sysenter_target)
 	jnz  sysenter_tracesys
 	cmpl	$(IA32_NR_syscalls-1),%eax
 	ja	ia32_badsys
-sysenter_do_call:
-	IA32_ARG_FIXUP
-sysenter_dispatch:
-	call	*ia32_sys_call_table(,%rax,8)
-	movq	%rax,RAX-ARGOFFSET(%rsp)
-	GET_THREAD_INFO(%r10)
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	testl	$_TIF_ALLWORK_MASK,TI_flags(%r10)
-	jnz	sysexit_audit
-	jmp int_ret_from_sys_call
+	jmp	ia32_do_call
 
 #ifdef CONFIG_AUDITSYSCALL
 	.macro auditsys_entry_common
@@ -175,31 +162,10 @@ sysenter_dispatch:
 	movl RDI-ARGOFFSET(%rsp),%r8d	/* reload 5th syscall arg */
 	.endm
 
-	.macro auditsys_exit exit,ebpsave=RBP
-	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
-	jnz int_ret_from_sys_call
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_NONE)
-	movl %eax,%esi		/* second arg, syscall return value */
-	cmpl $0,%eax		/* is it < 0? */
-	setl %al		/* 1 if so, 0 if not */
-	movzbl %al,%edi		/* zero-extend that into %edi */
-	inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
-	call audit_syscall_exit
-	movl \ebpsave-ARGOFFSET(%rsp),%ebp /* reload user register value */
-	movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	jmp int_with_check
-	.endm
-
 sysenter_auditsys:
 	auditsys_entry_common
 	movl %ebp,%r9d			/* reload 6th syscall arg */
-	jmp sysenter_dispatch
-
-sysexit_audit:
-	auditsys_exit sysexit_from_sys_call
+	jmp ia32_dispatch
 #endif
 
 sysenter_tracesys:
@@ -216,7 +182,7 @@ sysenter_tracesys:
 	RESTORE_REST
 	cmpl	$(IA32_NR_syscalls-1),%eax
 	ja	int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
-	jmp	sysenter_do_call
+	jmp	ia32_do_call
 	CFI_ENDPROC
 ENDPROC(ia32_sysenter_target)
 
@@ -272,24 +238,13 @@ ENTRY(ia32_cstar_target)
 	ja  ia32_badsys
 cstar_do_call:
 	IA32_ARG_FIXUP 1
-cstar_dispatch:
-	call *ia32_sys_call_table(,%rax,8)
-	movq %rax,RAX-ARGOFFSET(%rsp)
-	GET_THREAD_INFO(%r10)
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
-	jnz sysretl_audit
-	jmp int_ret_from_sys_call
 	
 #ifdef CONFIG_AUDITSYSCALL
 cstar_auditsys:
 	movl %r9d,R9-ARGOFFSET(%rsp)	/* register to be clobbered by call */
 	auditsys_entry_common
 	movl R9-ARGOFFSET(%rsp),%r9d	/* reload 6th syscall arg */
-	jmp cstar_dispatch
-
-sysretl_audit:
-	auditsys_exit sysretl_from_sys_call, RCX /* user %ebp in RCX slot */
+	jmp ia32_dispatch
 #endif
 
 cstar_tracesys:
@@ -299,7 +254,7 @@ cstar_tracesys:
 #endif
 	xchgl %r9d,%ebp
 	SAVE_REST
-	CLEAR_RREGS r9
+	CLEAR_RREGS 0, r9
 	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
@@ -367,9 +322,11 @@ ENTRY(ia32_syscall)
 	ja ia32_badsys
 ia32_do_call:
 	IA32_ARG_FIXUP
+ia32_dispatch:
 	call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
 ia32_sysret:
 	movq %rax,RAX-ARGOFFSET(%rsp)
+	CLEAR_RREGS -ARGOFFSET
 	jmp int_ret_from_sys_call 
 
 ia32_tracesys:			 
@@ -387,8 +344,8 @@ END(ia32_syscall)
 
 ia32_badsys:
 	movq $0,ORIG_RAX-ARGOFFSET(%rsp)
-	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
-	jmp int_ret_from_sys_call
+	movq $-ENOSYS,%rax
+	jmp ia32_sysret
 
 quiet_ni_syscall:
 	movq $-ENOSYS,%rax
@@ -482,7 +439,7 @@ ia32_sys_call_table:
 	.quad sys_mkdir
 	.quad sys_rmdir		/* 40 */
 	.quad sys_dup
-	.quad sys32_pipe
+	.quad sys_pipe
 	.quad compat_sys_times
 	.quad quiet_ni_syscall			/* old prof syscall holder */
 	.quad sys_brk		/* 45 */
@@ -776,5 +733,5 @@ ia32_sys_call_table:
 	.quad compat_sys_preadv
 	.quad compat_sys_pwritev
 	.quad compat_sys_rt_tgsigqueueinfo	/* 335 */
-	.quad sys_perf_counter_open
+	.quad sys_perf_event_open
 ia32_syscall_end:
--- sle11sp1-2010-03-29.orig/arch/x86/include/asm/time.h	2009-11-06 10:51:25.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/include/asm/time.h	2009-11-06 10:52:22.000000000 +0100
@@ -8,8 +8,9 @@ extern void hpet_time_init(void);
 extern void time_init(void);
 
 #ifdef CONFIG_XEN
+struct timespec;
 extern int xen_independent_wallclock(void);
-extern unsigned long xen_read_persistent_clock(void);
+extern void xen_read_persistent_clock(struct timespec *);
 extern int xen_update_persistent_clock(void);
 #endif
 
--- sle11sp1-2010-03-29.orig/arch/x86/include/asm/uv/uv_hub.h	2010-03-31 09:52:27.000000000 +0200
+++ sle11sp1-2010-03-29/arch/x86/include/asm/uv/uv_hub.h	2009-11-06 11:32:17.000000000 +0100
@@ -11,7 +11,7 @@
 #ifndef _ASM_X86_UV_UV_HUB_H
 #define _ASM_X86_UV_UV_HUB_H
 
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_UV
 #include <linux/numa.h>
 #include <linux/percpu.h>
 #include <linux/timer.h>
--- sle11sp1-2010-03-29.orig/arch/x86/include/mach-xen/asm/agp.h	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/include/mach-xen/asm/agp.h	2009-11-06 10:52:22.000000000 +0100
@@ -28,10 +28,7 @@
  */
 #define flush_agp_cache() wbinvd()
 
-/* Convert a physical address to an address suitable for the GART. */
-#define phys_to_gart(x) phys_to_machine(x)
-#define gart_to_phys(x) machine_to_phys(x)
-#define page_to_gart(x) phys_to_gart(page_to_pseudophys(x))
+#define virt_to_gart virt_to_machine
 
 /* GATT allocation. Returns/accepts GATT kernel virtual address. */
 #define alloc_gatt_pages(order)	({                                          \
--- sle11sp1-2010-03-29.orig/arch/x86/include/mach-xen/asm/desc.h	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/include/mach-xen/asm/desc.h	2009-11-18 14:54:16.000000000 +0100
@@ -312,7 +312,14 @@ static inline void load_LDT(mm_context_t
 
 static inline unsigned long get_desc_base(const struct desc_struct *desc)
 {
-	return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24);
+	return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+}
+
+static inline void set_desc_base(struct desc_struct *desc, unsigned long base)
+{
+	desc->base0 = base & 0xffff;
+	desc->base1 = (base >> 16) & 0xff;
+	desc->base2 = (base >> 24) & 0xff;
 }
 
 static inline unsigned long get_desc_limit(const struct desc_struct *desc)
@@ -320,6 +327,12 @@ static inline unsigned long get_desc_lim
 	return desc->limit0 | (desc->limit << 16);
 }
 
+static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
+{
+	desc->limit0 = limit & 0xffff;
+	desc->limit = (limit >> 16) & 0xf;
+}
+
 #ifndef CONFIG_X86_NO_IDT
 static inline void _set_gate(int gate, unsigned type, void *addr,
 			     unsigned dpl, unsigned ist, unsigned seg)
--- sle11sp1-2010-03-29.orig/arch/x86/include/mach-xen/asm/dma-mapping.h	2009-11-06 10:51:47.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/include/mach-xen/asm/dma-mapping.h	2009-11-06 10:52:22.000000000 +0100
@@ -1,11 +1,24 @@
 #ifndef _ASM_X86_DMA_MAPPING_H_
 
+#define phys_to_dma _phys_to_dma_
+#define dma_to_phys _dma_to_phys_
+
 #include_next <asm/dma-mapping.h>
 
-void dma_generic_free_coherent(struct device *, size_t, void *, dma_addr_t);
+#undef phys_to_dma
+#undef dma_to_phys
+
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+	return phys_to_machine(paddr);
+}
 
-#define address_needs_mapping(hwdev, addr, size) \
-	!is_buffer_dma_capable(dma_get_mask(hwdev), addr, size)
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+	return machine_to_phys(daddr);
+}
+
+void dma_generic_free_coherent(struct device *, size_t, void *, dma_addr_t);
 
 extern int range_straddles_page_boundary(paddr_t p, size_t size);
 
--- sle11sp1-2010-03-29.orig/arch/x86/include/mach-xen/asm/fixmap.h	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/include/mach-xen/asm/fixmap.h	2009-11-06 10:52:22.000000000 +0100
@@ -139,6 +139,9 @@ enum fixed_addresses {
 #ifdef CONFIG_X86_32
 	FIX_WP_TEST,
 #endif
+#ifdef CONFIG_INTEL_TXT
+	FIX_TBOOT_BASE,
+#endif
 	__end_of_fixed_addresses
 };
 
--- sle11sp1-2010-03-29.orig/arch/x86/include/mach-xen/asm/hypervisor.h	2009-11-23 10:43:12.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/include/mach-xen/asm/hypervisor.h	2009-11-23 10:44:04.000000000 +0100
@@ -70,6 +70,7 @@ extern start_info_t *xen_start_info;
 #endif
 
 #define init_hypervisor(c) ((void)((c)->x86_hyper_vendor = X86_HYPER_VENDOR_XEN))
+#define init_hypervisor_platform() init_hypervisor(&boot_cpu_data)
 
 struct vcpu_runstate_info *setup_runstate_area(unsigned int cpu);
 
@@ -351,6 +352,6 @@ MULTI_grant_table_op(multicall_entry_t *
 
 #endif
 
-#define uvm_multi(cpumask) ((unsigned long)cpus_addr(cpumask) | UVMF_MULTI)
+#define uvm_multi(cpumask) ((unsigned long)cpumask_bits(cpumask) | UVMF_MULTI)
 
 #endif /* __HYPERVISOR_H__ */
--- sle11sp1-2010-03-29.orig/arch/x86/include/mach-xen/asm/irqflags.h	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/include/mach-xen/asm/irqflags.h	2009-11-06 10:52:22.000000000 +0100
@@ -1,7 +1,7 @@
 #ifndef _X86_IRQFLAGS_H_
 #define _X86_IRQFLAGS_H_
 
-#include <asm/processor-flags.h>
+#include <asm/smp-processor-id.h>
 
 #ifndef __ASSEMBLY__
 /*
--- sle11sp1-2010-03-29.orig/arch/x86/include/mach-xen/asm/mmu_context.h	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/include/mach-xen/asm/mmu_context.h	2009-11-06 10:52:22.000000000 +0100
@@ -88,12 +88,12 @@ static inline void switch_mm(struct mm_s
 		       !PagePinned(virt_to_page(next->pgd)));
 
 		/* stop flush ipis for the previous mm */
-		cpu_clear(cpu, prev->cpu_vm_mask);
+		cpumask_clear_cpu(cpu, mm_cpumask(prev));
 #if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */
 		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
 		percpu_write(cpu_tlbstate.active_mm, next);
 #endif
-		cpu_set(cpu, next->cpu_vm_mask);
+		cpumask_set_cpu(cpu, mm_cpumask(next));
 
 		/* Re-load page tables: load_cr3(next->pgd) */
 		op->cmd = MMUEXT_NEW_BASEPTR;
@@ -125,7 +125,7 @@ static inline void switch_mm(struct mm_s
 		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
 		BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
 
-		if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
+		if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
 			/* We were in lazy tlb mode and leave_mm disabled
 			 * tlb flush IPI delivery. We must reload CR3
 			 * to make sure to use no freed page tables.
--- sle11sp1-2010-03-29.orig/arch/x86/include/mach-xen/asm/pci.h	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/include/mach-xen/asm/pci.h	2009-11-06 10:52:22.000000000 +0100
@@ -151,7 +151,11 @@ static inline int __pcibus_to_node(const
 static inline const struct cpumask *
 cpumask_of_pcibus(const struct pci_bus *bus)
 {
-	return cpumask_of_node(__pcibus_to_node(bus));
+	int node;
+
+	node = __pcibus_to_node(bus);
+	return (node == -1) ? cpu_online_mask :
+			      cpumask_of_node(node);
 }
 #endif
 
--- sle11sp1-2010-03-29.orig/arch/x86/include/mach-xen/asm/pgtable.h	2009-11-20 11:17:56.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/include/mach-xen/asm/pgtable.h	2009-11-20 11:18:13.000000000 +0100
@@ -53,16 +53,6 @@ extern struct list_head pgd_list;
 #define pte_update(mm, addr, ptep)              do { } while (0)
 #define pte_update_defer(mm, addr, ptep)        do { } while (0)
 
-static inline void __init paravirt_pagetable_setup_start(pgd_t *base)
-{
-	xen_pagetable_setup_start(base);
-}
-
-static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
-{
-	xen_pagetable_setup_done(base);
-}
-
 #define pgd_val(x)	xen_pgd_val(x)
 #define __pgd(x)	xen_make_pgd(x)
 
@@ -134,6 +124,11 @@ static inline int pte_special(pte_t pte)
 
 #define pte_page(pte)	pfn_to_page(pte_pfn(pte))
 
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+	return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
 static inline int pmd_large(pmd_t pte)
 {
 	return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
@@ -363,7 +358,7 @@ static inline unsigned long pmd_page_vad
  * this macro returns the index of the entry in the pmd page which would
  * control the given virtual address
  */
-static inline unsigned pmd_index(unsigned long address)
+static inline unsigned long pmd_index(unsigned long address)
 {
 	return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
 }
@@ -383,7 +378,7 @@ static inline unsigned pmd_index(unsigne
  * this function returns the index of the entry in the pte page which would
  * control the given virtual address
  */
-static inline unsigned pte_index(unsigned long address)
+static inline unsigned long pte_index(unsigned long address)
 {
 	return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
 }
@@ -439,11 +434,6 @@ static inline pmd_t *pmd_offset(pud_t *p
 	return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
 }
 
-static inline unsigned long pmd_pfn(pmd_t pmd)
-{
-	return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
-}
-
 static inline int pud_large(pud_t pud)
 {
 	return (__pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
@@ -479,7 +469,7 @@ static inline unsigned long pgd_page_vad
 #define pgd_page(pgd)		pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
 
 /* to find an entry in a page-table-directory. */
-static inline unsigned pud_index(unsigned long address)
+static inline unsigned long pud_index(unsigned long address)
 {
 	return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
 }
@@ -600,7 +590,7 @@ extern int ptep_clear_flush_young(struct
 	if (!pte_none(__res) &&					\
 	    ((vma)->vm_mm != current->mm ||			\
 	     HYPERVISOR_update_va_mapping(addr,	__pte(0),	\
-			uvm_multi((vma)->vm_mm->cpu_vm_mask) |	\
+			uvm_multi(mm_cpumask((vma)->vm_mm)) |	\
 				UVMF_INVLPG))) {		\
 		__xen_pte_clear(__ptep);			\
 		flush_tlb_page(vma, addr);			\
--- sle11sp1-2010-03-29.orig/arch/x86/include/mach-xen/asm/pgtable_types.h	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/include/mach-xen/asm/pgtable_types.h	2009-11-06 10:52:22.000000000 +0100
@@ -334,6 +334,7 @@ static inline pteval_t pte_flags(pte_t p
 typedef struct page *pgtable_t;
 
 extern pteval_t __supported_pte_mask;
+extern void set_nx(void);
 extern int nx_enabled;
 
 #define pgprot_writecombine	pgprot_writecombine
@@ -354,14 +355,6 @@ int phys_mem_access_prot_allowed(struct 
 /* Install a pte for a particular vaddr in kernel space. */
 void set_pte_vaddr(unsigned long vaddr, pte_t pte);
 
-#ifndef CONFIG_XEN
-extern void native_pagetable_setup_start(pgd_t *base);
-extern void native_pagetable_setup_done(pgd_t *base);
-#else
-static inline void xen_pagetable_setup_start(pgd_t *base) {}
-static inline void xen_pagetable_setup_done(pgd_t *base) {}
-#endif
-
 struct seq_file;
 extern void arch_report_meminfo(struct seq_file *m);
 
--- sle11sp1-2010-03-29.orig/arch/x86/include/mach-xen/asm/processor.h	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/include/mach-xen/asm/processor.h	2010-03-17 14:36:55.000000000 +0100
@@ -27,6 +27,7 @@ struct mm_struct;
 #include <linux/cpumask.h>
 #include <linux/cache.h>
 #include <linux/threads.h>
+#include <linux/math64.h>
 #include <linux/init.h>
 #include <xen/interface/physdev.h>
 
@@ -411,7 +412,17 @@ extern unsigned long kernel_eflags;
 extern asmlinkage void ignore_sysret(void);
 #else	/* X86_64 */
 #ifdef CONFIG_CC_STACKPROTECTOR
-DECLARE_PER_CPU(unsigned long, stack_canary);
+/*
+ * Make sure stack canary segment base is cached-aligned:
+ *   "For Intel Atom processors, avoid non zero segment base address
+ *    that is not aligned to cache line boundary at all cost."
+ * (Optim Ref Manual Assembly/Compiler Coding Rule 15.)
+ */
+struct stack_canary {
+	char __pad[20];		/* canary at %gs:20 */
+	unsigned long canary;
+};
+DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
 #endif	/* X86_64 */
 
@@ -648,13 +659,23 @@ static inline void cpu_relax(void)
 	rep_nop();
 }
 
-/* Stop speculative execution: */
+/* Stop speculative execution and prefetching of modified code. */
 static inline void sync_core(void)
 {
 	int tmp;
 
-	asm volatile("cpuid" : "=a" (tmp) : "0" (1)
-		     : "ebx", "ecx", "edx", "memory");
+#if defined(CONFIG_M386) || defined(CONFIG_M486)
+	if (boot_cpu_data.x86 < 5)
+		/* There is no speculative execution.
+		 * jmp is a barrier to prefetching. */
+		asm volatile("jmp 1f\n1:\n" ::: "memory");
+	else
+#endif
+		/* cpuid is a barrier to speculative execution.
+		 * Prefetched instructions are automatically
+		 * invalidated when modified. */
+		asm volatile("cpuid" : "=a" (tmp) : "0" (1)
+			     : "ebx", "ecx", "edx", "memory");
 }
 
 static inline void __monitor(const void *eax, unsigned long ecx,
@@ -945,4 +966,35 @@ extern void start_thread(struct pt_regs 
 extern int get_tsc_mode(unsigned long adr);
 extern int set_tsc_mode(unsigned int val);
 
+extern int amd_get_nb_id(int cpu);
+
+struct aperfmperf {
+	u64 aperf, mperf;
+};
+
+static inline void get_aperfmperf(struct aperfmperf *am)
+{
+	WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_APERFMPERF));
+
+	rdmsrl(MSR_IA32_APERF, am->aperf);
+	rdmsrl(MSR_IA32_MPERF, am->mperf);
+}
+
+#define APERFMPERF_SHIFT 10
+
+static inline
+unsigned long calc_aperfmperf_ratio(struct aperfmperf *old,
+				    struct aperfmperf *new)
+{
+	u64 aperf = new->aperf - old->aperf;
+	u64 mperf = new->mperf - old->mperf;
+	unsigned long ratio = aperf;
+
+	mperf >>= APERFMPERF_SHIFT;
+	if (mperf)
+		ratio = div64_u64(aperf, mperf);
+
+	return ratio;
+}
+
 #endif /* _ASM_X86_PROCESSOR_H */
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ sle11sp1-2010-03-29/arch/x86/include/mach-xen/asm/setup.h	2009-11-06 10:52:22.000000000 +0100
@@ -0,0 +1,8 @@
+#ifndef __ASSEMBLY__
+
+void xen_start_kernel(void);
+void xen_arch_setup(void);
+
+#endif
+
+#include_next <asm/setup.h>
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ sle11sp1-2010-03-29/arch/x86/include/mach-xen/asm/smp-processor-id.h	2009-11-06 10:52:22.000000000 +0100
@@ -0,0 +1,36 @@
+#ifndef _ASM_X86_SMP_PROCESSOR_ID_H
+#define _ASM_X86_SMP_PROCESSOR_ID_H
+
+#if defined(CONFIG_SMP) && !defined(__ASSEMBLY__)
+
+#include <asm/percpu.h>
+
+DECLARE_PER_CPU(int, cpu_number);
+
+/*
+ * This function is needed by all SMP systems. It must _always_ be valid
+ * from the initial startup. We map APIC_BASE very early in page_setup(),
+ * so this is correct in the x86 case.
+ */
+#define raw_smp_processor_id() percpu_read(cpu_number)
+#define safe_smp_processor_id() smp_processor_id()
+
+#ifdef CONFIG_X86_64_SMP
+#define stack_smp_processor_id()					\
+({									\
+	struct thread_info *ti;						\
+	__asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK));	\
+	ti->cpu;							\
+})
+#endif
+
+#ifdef CONFIG_DEBUG_PREEMPT
+extern unsigned int debug_smp_processor_id(void);
+# define smp_processor_id() debug_smp_processor_id()
+#else
+# define smp_processor_id() raw_smp_processor_id()
+#endif
+
+#endif /* SMP && !__ASSEMBLY__ */
+
+#endif /* _ASM_X86_SMP_PROCESSOR_ID_H */
--- sle11sp1-2010-03-29.orig/arch/x86/include/mach-xen/asm/smp.h	2009-11-20 11:17:59.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/include/mach-xen/asm/smp.h	2009-11-20 11:18:10.000000000 +0100
@@ -121,7 +121,6 @@ static inline void arch_send_call_functi
 	smp_ops.send_call_func_single_ipi(cpu);
 }
 
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
 static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 {
 	smp_ops.send_call_func_ipi(mask);
@@ -167,27 +166,7 @@ static inline int num_booting_cpus(void)
 
 extern unsigned disabled_cpus __cpuinitdata;
 
-#ifdef CONFIG_X86_32_SMP
-/*
- * This function is needed by all SMP systems. It must _always_ be valid
- * from the initial startup. We map APIC_BASE very early in page_setup(),
- * so this is correct in the x86 case.
- */
-#define raw_smp_processor_id() (percpu_read(cpu_number))
-#define safe_smp_processor_id() smp_processor_id()
-
-#elif defined(CONFIG_X86_64_SMP)
-#define raw_smp_processor_id() (percpu_read(cpu_number))
-
-#define stack_smp_processor_id()					\
-({								\
-	struct thread_info *ti;						\
-	__asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK));	\
-	ti->cpu;							\
-})
-#define safe_smp_processor_id()		smp_processor_id()
-
-#endif
+#include <asm/smp-processor-id.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
 
--- sle11sp1-2010-03-29.orig/arch/x86/include/mach-xen/asm/system.h	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/include/mach-xen/asm/system.h	2009-11-06 10:52:22.000000000 +0100
@@ -30,7 +30,7 @@ void __switch_to_xtra(struct task_struct
 	"movl %P[task_canary](%[next]), %%ebx\n\t"			\
 	"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
 #define __switch_canary_oparam						\
-	, [stack_canary] "=m" (per_cpu_var(stack_canary))
+	, [stack_canary] "=m" (per_cpu_var(stack_canary.canary))
 #define __switch_canary_iparam						\
 	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
 #else	/* CC_STACKPROTECTOR */
@@ -149,33 +149,6 @@ do {									\
 #endif
 
 #ifdef __KERNEL__
-#define _set_base(addr, base) do { unsigned long __pr; \
-__asm__ __volatile__ ("movw %%dx,%1\n\t" \
-	"rorl $16,%%edx\n\t" \
-	"movb %%dl,%2\n\t" \
-	"movb %%dh,%3" \
-	:"=&d" (__pr) \
-	:"m" (*((addr)+2)), \
-	 "m" (*((addr)+4)), \
-	 "m" (*((addr)+7)), \
-	 "0" (base) \
-	); } while (0)
-
-#define _set_limit(addr, limit) do { unsigned long __lr; \
-__asm__ __volatile__ ("movw %%dx,%1\n\t" \
-	"rorl $16,%%edx\n\t" \
-	"movb %2,%%dh\n\t" \
-	"andb $0xf0,%%dh\n\t" \
-	"orb %%dh,%%dl\n\t" \
-	"movb %%dl,%2" \
-	:"=&d" (__lr) \
-	:"m" (*(addr)), \
-	 "m" (*((addr)+6)), \
-	 "0" (limit) \
-	); } while (0)
-
-#define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base))
-#define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1))
 
 extern void xen_load_gs_index(unsigned);
 
--- sle11sp1-2010-03-29.orig/arch/x86/include/mach-xen/asm/tlbflush.h	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/include/mach-xen/asm/tlbflush.h	2009-11-06 10:52:22.000000000 +0100
@@ -74,9 +74,9 @@ static inline void reset_lazy_tlbstate(v
 #define local_flush_tlb() __flush_tlb()
 
 #define flush_tlb_all xen_tlb_flush_all
-#define flush_tlb_current_task() xen_tlb_flush_mask(&current->mm->cpu_vm_mask)
-#define flush_tlb_mm(mm) xen_tlb_flush_mask(&(mm)->cpu_vm_mask)
-#define flush_tlb_page(vma, va) xen_invlpg_mask(&(vma)->vm_mm->cpu_vm_mask, va)
+#define flush_tlb_current_task() xen_tlb_flush_mask(mm_cpumask(current->mm))
+#define flush_tlb_mm(mm) xen_tlb_flush_mask(mm_cpumask(mm))
+#define flush_tlb_page(vma, va) xen_invlpg_mask(mm_cpumask((vma)->vm_mm), va)
 
 #define flush_tlb()	flush_tlb_current_task()
 
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/Makefile	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/Makefile	2009-11-06 10:52:22.000000000 +0100
@@ -132,8 +132,6 @@ ifeq ($(CONFIG_X86_64),y)
 
 	obj-$(CONFIG_PCI_MMCONFIG)	+= mmconf-fam10h_64.o
 	obj-y				+= vsmp_64.o
-
-	time_64-$(CONFIG_XEN)		+= time_32.o
 endif
 
 disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o hpet.o i8253.o \
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/apic/io_apic-xen.c	2010-02-18 15:29:00.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/apic/io_apic-xen.c	2009-11-06 10:52:22.000000000 +0100
@@ -79,6 +79,8 @@ unsigned long io_apic_irqs;
 #endif /* CONFIG_XEN */
 
 #define __apicdebuginit(type) static type __init
+#define for_each_irq_pin(entry, head) \
+	for (entry = head; entry; entry = entry->next)
 
 /*
  *      Is the SiS APIC rmw bug present ?
@@ -100,12 +102,24 @@ int nr_ioapic_registers[MAX_IO_APICS];
 struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
 
+/* IO APIC gsi routing info */
+struct mp_ioapic_gsi  mp_gsi_routing[MAX_IO_APICS];
+
 /* MP IRQ source entries */
 struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
 
 /* # of MP IRQ source entries */
 int mp_irq_entries;
 
+#ifndef CONFIG_XEN
+/* Number of legacy interrupts */
+static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY;
+/* GSI interrupts */
+static int nr_irqs_gsi = NR_IRQS_LEGACY;
+#else
+#define nr_legacy_irqs NR_IRQS_LEGACY
+#endif
+
 #if defined (CONFIG_MCA) || defined (CONFIG_EISA)
 int mp_bus_id_to_type[MAX_MP_BUSSES];
 #endif
@@ -132,15 +146,6 @@ static int __init parse_noapic(char *str
 early_param("noapic", parse_noapic);
 
 #ifndef CONFIG_XEN
-struct irq_pin_list;
-
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
 struct irq_pin_list {
 	int apic, pin;
 	struct irq_pin_list *next;
@@ -155,6 +160,11 @@ static struct irq_pin_list *get_one_free
 	return pin;
 }
 
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * Most irqs are mapped 1:1 with pins.
+ */
 struct irq_cfg {
 	struct irq_pin_list *irq_2_pin;
 	cpumask_var_t domain;
@@ -188,6 +198,12 @@ static struct irq_cfg irq_cfgx[NR_IRQS] 
 	[15] = { .vector = IRQ15_VECTOR, },
 };
 
+void __init io_apic_disable_legacy(void)
+{
+	nr_legacy_irqs = 0;
+	nr_irqs_gsi = 0;
+}
+
 int __init arch_early_irq_init(void)
 {
 	struct irq_cfg *cfg;
@@ -205,7 +221,7 @@ int __init arch_early_irq_init(void)
 		desc->chip_data = &cfg[i];
 		zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
 		zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
-		if (i < NR_IRQS_LEGACY)
+		if (i < nr_legacy_irqs)
 			cpumask_setall(cfg[i].domain);
 	}
 
@@ -231,17 +247,14 @@ static struct irq_cfg *get_one_free_irq_
 
 	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
 	if (cfg) {
-		if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
+		if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
 			kfree(cfg);
 			cfg = NULL;
-		} else if (!alloc_cpumask_var_node(&cfg->old_domain,
+		} else if (!zalloc_cpumask_var_node(&cfg->old_domain,
 							  GFP_ATOMIC, node)) {
 			free_cpumask_var(cfg->domain);
 			kfree(cfg);
 			cfg = NULL;
-		} else {
-			cpumask_clear(cfg->domain);
-			cpumask_clear(cfg->old_domain);
 		}
 	}
 
@@ -455,13 +468,10 @@ static bool io_apic_level_ack_pending(st
 	unsigned long flags;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	entry = cfg->irq_2_pin;
-	for (;;) {
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
 		unsigned int reg;
 		int pin;
 
-		if (!entry)
-			break;
 		pin = entry->pin;
 		reg = io_apic_read(entry->apic, 0x10 + pin*2);
 		/* Is the remote IRR bit set? */
@@ -469,9 +479,6 @@ static bool io_apic_level_ack_pending(st
 			spin_unlock_irqrestore(&ioapic_lock, flags);
 			return true;
 		}
-		if (!entry->next)
-			break;
-		entry = entry->next;
 	}
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 
@@ -543,72 +550,68 @@ static void ioapic_mask_entry(int apic, 
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+static int
+add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
 {
-	struct irq_pin_list *entry;
+	struct irq_pin_list **last, *entry;
 
-	entry = cfg->irq_2_pin;
-	if (!entry) {
-		entry = get_one_free_irq_2_pin(node);
-		if (!entry) {
-			printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
-					apic, pin);
-			return;
-		}
-		cfg->irq_2_pin = entry;
-		entry->apic = apic;
-		entry->pin = pin;
-		return;
-	}
-
-	while (entry->next) {
-		/* not again, please */
+	/* don't allow duplicates */
+	last = &cfg->irq_2_pin;
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
 		if (entry->apic == apic && entry->pin == pin)
-			return;
-
-		entry = entry->next;
+			return 0;
+		last = &entry->next;
 	}
 
-	entry->next = get_one_free_irq_2_pin(node);
-	entry = entry->next;
+	entry = get_one_free_irq_2_pin(node);
+	if (!entry) {
+		printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
+				node, apic, pin);
+		return -ENOMEM;
+	}
 	entry->apic = apic;
 	entry->pin = pin;
+
+	*last = entry;
+	return 0;
+}
+
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+{
+	if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin))
+		panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
 }
 
 /*
  * Reroute an IRQ to a different pin.
  */
 static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
-				      int oldapic, int oldpin,
-				      int newapic, int newpin)
+					   int oldapic, int oldpin,
+					   int newapic, int newpin)
 {
-	struct irq_pin_list *entry = cfg->irq_2_pin;
-	int replaced = 0;
+	struct irq_pin_list *entry;
 
-	while (entry) {
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
 		if (entry->apic == oldapic && entry->pin == oldpin) {
 			entry->apic = newapic;
 			entry->pin = newpin;
-			replaced = 1;
 			/* every one is different, right? */
-			break;
+			return;
 		}
-		entry = entry->next;
 	}
 
-	/* why? call replace before add? */
-	if (!replaced)
-		add_pin_to_irq_node(cfg, node, newapic, newpin);
+	/* old apic/pin didn't exist, so just add new ones */
+	add_pin_to_irq_node(cfg, node, newapic, newpin);
 }
 
-static inline void io_apic_modify_irq(struct irq_cfg *cfg,
-				int mask_and, int mask_or,
-				void (*final)(struct irq_pin_list *entry))
+static void io_apic_modify_irq(struct irq_cfg *cfg,
+			       int mask_and, int mask_or,
+			       void (*final)(struct irq_pin_list *entry))
 {
 	int pin;
 	struct irq_pin_list *entry;
 
-	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
 		unsigned int reg;
 		pin = entry->pin;
 		reg = io_apic_read(entry->apic, 0x10 + pin * 2);
@@ -625,7 +628,6 @@ static void __unmask_IO_APIC_irq(struct 
 	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
 }
 
-#ifdef CONFIG_X86_64
 static void io_apic_sync(struct irq_pin_list *entry)
 {
 	/*
@@ -641,11 +643,6 @@ static void __mask_IO_APIC_irq(struct ir
 {
 	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
 }
-#else /* CONFIG_X86_32 */
-static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
-{
-	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
-}
 
 static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
 {
@@ -658,7 +655,6 @@ static void __unmask_and_level_IO_APIC_i
 	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
 			IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
 }
-#endif /* CONFIG_X86_32 */
 
 static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
 {
@@ -719,6 +715,7 @@ static void clear_IO_APIC (void)
 }
 #else
 #define add_pin_to_irq_node(cfg, node, apic, pin)
+#define add_pin_to_irq_node_nopanic(cfg, node, apic, pin) 0
 #endif /* CONFIG_XEN */
 
 #ifdef CONFIG_X86_32
@@ -935,7 +932,7 @@ static int __init find_isa_irq_apic(int 
  */
 static int EISA_ELCR(unsigned int irq)
 {
-	if (irq < NR_IRQS_LEGACY) {
+	if (irq < nr_legacy_irqs) {
 		unsigned int port = 0x4d0 + (irq >> 3);
 		return (inb(port) >> (irq & 7)) & 1;
 	}
@@ -1547,7 +1544,7 @@ static void setup_IO_APIC_irq(int apic_i
 	}
 
 	ioapic_register_intr(irq, desc, trigger);
-	if (irq < NR_IRQS_LEGACY)
+	if (irq < nr_legacy_irqs)
 		disable_8259A_irq(irq);
 
 	ioapic_write_entry(apic_id, pin, entry);
@@ -1775,12 +1772,8 @@ __apicdebuginit(void) print_IO_APIC(void
 		if (!entry)
 			continue;
 		printk(KERN_DEBUG "IRQ%d ", irq);
-		for (;;) {
+		for_each_irq_pin(entry, cfg->irq_2_pin)
 			printk("-> %d:%d", entry->apic, entry->pin);
-			if (!entry->next)
-				break;
-			entry = entry->next;
-		}
 		printk("\n");
 	}
 
@@ -1924,7 +1917,7 @@ __apicdebuginit(void) print_PIC(void)
 	unsigned int v;
 	unsigned long flags;
 
-	if (apic_verbosity == APIC_QUIET)
+	if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs)
 		return;
 
 	printk(KERN_DEBUG "\nprinting PIC contents\n");
@@ -1956,7 +1949,7 @@ __apicdebuginit(int) print_all_ICs(void)
 	print_PIC();
 
 	/* don't print out if apic is not there */
-	if (!cpu_has_apic || disable_apic)
+	if (!cpu_has_apic && !apic_from_smp_config())
 		return 0;
 
 	print_all_local_APICs();
@@ -1990,6 +1983,10 @@ void __init enable_IO_APIC(void)
 		spin_unlock_irqrestore(&ioapic_lock, flags);
 		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
 	}
+
+	if (!nr_legacy_irqs)
+		return;
+
 #ifndef CONFIG_XEN
 	for(apic = 0; apic < nr_ioapics; apic++) {
 		int pin;
@@ -2049,6 +2046,9 @@ void disable_IO_APIC(void)
 	 */
 	clear_IO_APIC();
 
+	if (!nr_legacy_irqs)
+		return;
+
 	/*
 	 * If the i8259 is routed through an IOAPIC
 	 * Put that IOAPIC in virtual wire mode
@@ -2082,7 +2082,7 @@ void disable_IO_APIC(void)
 	/*
 	 * Use virtual wire A mode when interrupt remapping is enabled.
 	 */
-	if (cpu_has_apic)
+	if (cpu_has_apic || apic_from_smp_config())
 		disconnect_bsp_APIC(!intr_remapping_enabled &&
 				ioapic_i8259.pin != -1);
 }
@@ -2095,7 +2095,7 @@ void disable_IO_APIC(void)
  * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
  */
 
-static void __init setup_ioapic_ids_from_mpc(void)
+void __init setup_ioapic_ids_from_mpc(void)
 {
 	union IO_APIC_reg_00 reg_00;
 	physid_mask_t phys_id_present_map;
@@ -2104,9 +2104,8 @@ static void __init setup_ioapic_ids_from
 	unsigned char old_id;
 	unsigned long flags;
 
-	if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
+	if (acpi_ioapic)
 		return;
-
 	/*
 	 * Don't check I/O APIC IDs for xAPIC systems.  They have
 	 * no meaning without the serial APIC bus.
@@ -2280,7 +2279,7 @@ static unsigned int startup_ioapic_irq(u
 	struct irq_cfg *cfg;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	if (irq < NR_IRQS_LEGACY) {
+	if (irq < nr_legacy_irqs) {
 		disable_8259A_irq(irq);
 		if (i8259A_irq_pending(irq))
 			was_pending = 1;
@@ -2292,7 +2291,6 @@ static unsigned int startup_ioapic_irq(u
 	return was_pending;
 }
 
-#ifdef CONFIG_X86_64
 static int ioapic_retrigger_irq(unsigned int irq)
 {
 
@@ -2305,14 +2303,6 @@ static int ioapic_retrigger_irq(unsigned
 
 	return 1;
 }
-#else
-static int ioapic_retrigger_irq(unsigned int irq)
-{
-	apic->send_IPI_self(irq_cfg(irq)->vector);
-
-	return 1;
-}
-#endif
 
 /*
  * Level and edge triggered IO-APIC interrupts need different handling,
@@ -2350,13 +2340,9 @@ static void __target_IO_APIC_irq(unsigne
 	struct irq_pin_list *entry;
 	u8 vector = cfg->vector;
 
-	entry = cfg->irq_2_pin;
-	for (;;) {
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
 		unsigned int reg;
 
-		if (!entry)
-			break;
-
 		apic = entry->apic;
 		pin = entry->pin;
 		/*
@@ -2369,9 +2355,6 @@ static void __target_IO_APIC_irq(unsigne
 		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
 		reg |= vector;
 		io_apic_modify(apic, 0x10 + pin*2, reg);
-		if (!entry->next)
-			break;
-		entry = entry->next;
 	}
 }
 
@@ -2596,11 +2579,8 @@ atomic_t irq_mis_count;
 static void ack_apic_level(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
-
-#ifdef CONFIG_X86_32
 	unsigned long v;
 	int i;
-#endif
 	struct irq_cfg *cfg;
 	int do_unmask_irq = 0;
 
@@ -2613,31 +2593,28 @@ static void ack_apic_level(unsigned int 
 	}
 #endif
 
-#ifdef CONFIG_X86_32
 	/*
-	* It appears there is an erratum which affects at least version 0x11
-	* of I/O APIC (that's the 82093AA and cores integrated into various
-	* chipsets).  Under certain conditions a level-triggered interrupt is
-	* erroneously delivered as edge-triggered one but the respective IRR
-	* bit gets set nevertheless.  As a result the I/O unit expects an EOI
-	* message but it will never arrive and further interrupts are blocked
-	* from the source.  The exact reason is so far unknown, but the
-	* phenomenon was observed when two consecutive interrupt requests
-	* from a given source get delivered to the same CPU and the source is
-	* temporarily disabled in between.
-	*
-	* A workaround is to simulate an EOI message manually.  We achieve it
-	* by setting the trigger mode to edge and then to level when the edge
-	* trigger mode gets detected in the TMR of a local APIC for a
-	* level-triggered interrupt.  We mask the source for the time of the
-	* operation to prevent an edge-triggered interrupt escaping meanwhile.
-	* The idea is from Manfred Spraul.  --macro
-	*/
+	 * It appears there is an erratum which affects at least version 0x11
+	 * of I/O APIC (that's the 82093AA and cores integrated into various
+	 * chipsets).  Under certain conditions a level-triggered interrupt is
+	 * erroneously delivered as edge-triggered one but the respective IRR
+	 * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+	 * message but it will never arrive and further interrupts are blocked
+	 * from the source.  The exact reason is so far unknown, but the
+	 * phenomenon was observed when two consecutive interrupt requests
+	 * from a given source get delivered to the same CPU and the source is
+	 * temporarily disabled in between.
+	 *
+	 * A workaround is to simulate an EOI message manually.  We achieve it
+	 * by setting the trigger mode to edge and then to level when the edge
+	 * trigger mode gets detected in the TMR of a local APIC for a
+	 * level-triggered interrupt.  We mask the source for the time of the
+	 * operation to prevent an edge-triggered interrupt escaping meanwhile.
+	 * The idea is from Manfred Spraul.  --macro
+	 */
 	cfg = desc->chip_data;
 	i = cfg->vector;
-
 	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-#endif
 
 	/*
 	 * We must acknowledge the irq before we move it or the acknowledge will
@@ -2679,7 +2656,7 @@ static void ack_apic_level(unsigned int 
 		unmask_IO_APIC_irq_desc(desc);
 	}
 
-#ifdef CONFIG_X86_32
+	/* Tail end of version 0x11 I/O APIC bug workaround */
 	if (!(v & (1 << (i & 0x1f)))) {
 		atomic_inc(&irq_mis_count);
 		spin_lock(&ioapic_lock);
@@ -2687,26 +2664,15 @@ static void ack_apic_level(unsigned int 
 		__unmask_and_level_IO_APIC_irq(cfg);
 		spin_unlock(&ioapic_lock);
 	}
-#endif
 }
 
 #ifdef CONFIG_INTR_REMAP
 static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
 {
-	int apic, pin;
 	struct irq_pin_list *entry;
 
-	entry = cfg->irq_2_pin;
-	for (;;) {
-
-		if (!entry)
-			break;
-
-		apic = entry->apic;
-		pin = entry->pin;
-		io_apic_eoi(apic, pin);
-		entry = entry->next;
-	}
+	for_each_irq_pin(entry, cfg->irq_2_pin)
+		io_apic_eoi(entry->apic, entry->pin);
 }
 
 static void
@@ -2796,7 +2762,7 @@ static inline void init_IO_APIC_traps(vo
 			 * so default to an old-fashioned 8259
 			 * interrupt if we can..
 			 */
-			if (irq < NR_IRQS_LEGACY)
+			if (irq < nr_legacy_irqs)
 				make_8259A_irq(irq);
 			else
 				/* Strange. Oh, well.. */
@@ -3136,7 +3102,7 @@ out:
  * the I/O APIC in all cases now.  No actual device should request
  * it anyway.  --macro
  */
-#define PIC_IRQS	(1 << PIC_CASCADE_IR)
+#define PIC_IRQS	(1UL << PIC_CASCADE_IR)
 
 void __init setup_IO_APIC(void)
 {
@@ -3148,23 +3114,21 @@ void __init setup_IO_APIC(void)
 	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
 	 */
 #endif
-
-	io_apic_irqs = ~PIC_IRQS;
+	io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
 
 	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
 	/*
          * Set up IO-APIC IRQ routing.
          */
 #ifndef CONFIG_XEN
-#ifdef CONFIG_X86_32
-	if (!acpi_ioapic)
-		setup_ioapic_ids_from_mpc();
-#endif
+	x86_init.mpparse.setup_ioapic_ids();
+
 	sync_Arb_IDs();
 #endif
 	setup_IO_APIC_irqs();
 	init_IO_APIC_traps();
-	check_timer();
+	if (nr_legacy_irqs)
+		check_timer();
 }
 
 /*
@@ -3274,7 +3238,6 @@ static int __init ioapic_init_sysfs(void
 
 device_initcall(ioapic_init_sysfs);
 
-static int nr_irqs_gsi = NR_IRQS_LEGACY;
 /*
  * Dynamic irq allocate and deallocation
  */
@@ -3346,8 +3309,7 @@ void destroy_irq(unsigned int irq)
 	cfg = desc->chip_data;
 	dynamic_irq_cleanup(irq);
 	/* connect back irq_cfg */
-	if (desc)
-		desc->chip_data = cfg;
+	desc->chip_data = cfg;
 
 	free_irte(irq);
 	spin_lock_irqsave(&vector_lock, flags);
@@ -4025,9 +3987,13 @@ static int __io_apic_set_pci_routing(str
 	/*
 	 * IRQs < 16 are already in the irq_2_pin[] map
 	 */
-	if (irq >= NR_IRQS_LEGACY) {
+	if (irq >= nr_legacy_irqs) {
 		cfg = desc->chip_data;
-		add_pin_to_irq_node(cfg, node, ioapic, pin);
+		if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
+			printk(KERN_INFO "can not add pin %d for irq %d\n",
+				pin, irq);
+			return 0;
+		}
 	}
 
 	setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
@@ -4056,11 +4022,28 @@ int io_apic_set_pci_routing(struct devic
 	return __io_apic_set_pci_routing(dev, irq, irq_attr);
 }
 
-/* --------------------------------------------------------------------------
-                          ACPI-based IOAPIC Configuration
-   -------------------------------------------------------------------------- */
+u8 __init io_apic_unique_id(u8 id)
+{
+#ifdef CONFIG_X86_32
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+	    !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+		return io_apic_get_unique_id(nr_ioapics, id);
+	else
+		return id;
+#else
+	int i;
+	DECLARE_BITMAP(used, 256);
 
-#ifdef CONFIG_ACPI
+	bitmap_zero(used, 256);
+	for (i = 0; i < nr_ioapics; i++) {
+		struct mpc_ioapic *ia = &mp_ioapics[i];
+		__set_bit(ia->apicid, used);
+	}
+	if (!test_bit(id, used))
+		return id;
+	return find_first_zero_bit(used, 256);
+#endif
+}
 
 #ifdef CONFIG_X86_32
 int __init io_apic_get_unique_id(int ioapic, int apic_id)
@@ -4171,8 +4154,6 @@ int acpi_get_override_irq(int bus_irq, i
 	return 0;
 }
 
-#endif /* CONFIG_ACPI */
-
 #ifndef CONFIG_XEN
 /*
  * This function currently is only a helper for the i386 smp boot process where
@@ -4227,7 +4208,7 @@ void __init setup_ioapic_dest(void)
 
 static struct resource *ioapic_resources;
 
-static struct resource * __init ioapic_setup_resources(void)
+static struct resource * __init ioapic_setup_resources(int nr_ioapics)
 {
 	unsigned long n;
 	struct resource *res;
@@ -4243,15 +4224,13 @@ static struct resource * __init ioapic_s
 	mem = alloc_bootmem(n);
 	res = (void *)mem;
 
-	if (mem != NULL) {
-		mem += sizeof(struct resource) * nr_ioapics;
+	mem += sizeof(struct resource) * nr_ioapics;
 
-		for (i = 0; i < nr_ioapics; i++) {
-			res[i].name = mem;
-			res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-			sprintf(mem,  "IOAPIC %u", i);
-			mem += IOAPIC_RESOURCE_NAME_SIZE;
-		}
+	for (i = 0; i < nr_ioapics; i++) {
+		res[i].name = mem;
+		res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		sprintf(mem,  "IOAPIC %u", i);
+		mem += IOAPIC_RESOURCE_NAME_SIZE;
 	}
 
 	ioapic_resources = res;
@@ -4265,7 +4244,7 @@ void __init ioapic_init_mappings(void)
 	struct resource *ioapic_res;
 	int i;
 
-	ioapic_res = ioapic_setup_resources();
+	ioapic_res = ioapic_setup_resources(nr_ioapics);
 	for (i = 0; i < nr_ioapics; i++) {
 		if (smp_found_config) {
 			ioapic_phys = mp_ioapics[i].apicaddr;
@@ -4294,11 +4273,9 @@ fake_ioapic_page:
 			    __fix_to_virt(idx), ioapic_phys);
 		idx++;
 
-		if (ioapic_res != NULL) {
-			ioapic_res->start = ioapic_phys;
-			ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
-			ioapic_res++;
-		}
+		ioapic_res->start = ioapic_phys;
+		ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+		ioapic_res++;
 	}
 }
 
@@ -4320,3 +4297,78 @@ void __init ioapic_insert_resources(void
 	}
 }
 #endif /* !CONFIG_XEN */
+
+int mp_find_ioapic(int gsi)
+{
+	int i = 0;
+
+	/* Find the IOAPIC that manages this GSI. */
+	for (i = 0; i < nr_ioapics; i++) {
+		if ((gsi >= mp_gsi_routing[i].gsi_base)
+		    && (gsi <= mp_gsi_routing[i].gsi_end))
+			return i;
+	}
+
+	printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+	return -1;
+}
+
+int mp_find_ioapic_pin(int ioapic, int gsi)
+{
+	if (WARN_ON(ioapic == -1))
+		return -1;
+	if (WARN_ON(gsi > mp_gsi_routing[ioapic].gsi_end))
+		return -1;
+
+	return gsi - mp_gsi_routing[ioapic].gsi_base;
+}
+
+static int bad_ioapic(unsigned long address)
+{
+	if (nr_ioapics >= MAX_IO_APICS) {
+		printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded "
+		       "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
+		return 1;
+	}
+	if (!address) {
+		printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address"
+		       " found in table, skipping!\n");
+		return 1;
+	}
+	return 0;
+}
+
+void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
+{
+	int idx = 0;
+
+	if (bad_ioapic(address))
+		return;
+
+	idx = nr_ioapics;
+
+	mp_ioapics[idx].type = MP_IOAPIC;
+	mp_ioapics[idx].flags = MPC_APIC_USABLE;
+	mp_ioapics[idx].apicaddr = address;
+
+#ifndef CONFIG_XEN
+	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+#endif
+	mp_ioapics[idx].apicid = io_apic_unique_id(id);
+	mp_ioapics[idx].apicver = io_apic_get_version(idx);
+
+	/*
+	 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
+	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
+	 */
+	mp_gsi_routing[idx].gsi_base = gsi_base;
+	mp_gsi_routing[idx].gsi_end = gsi_base +
+	    io_apic_get_redir_entries(idx);
+
+	printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
+	       "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
+	       mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
+	       mp_gsi_routing[idx].gsi_base, mp_gsi_routing[idx].gsi_end);
+
+	nr_ioapics++;
+}
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/cpu/Makefile	2010-02-09 16:56:39.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/cpu/Makefile	2010-02-09 17:07:42.000000000 +0100
@@ -33,7 +33,7 @@ obj-$(CONFIG_CPU_FREQ)			+= cpufreq/
 
 obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o
 
-disabled-obj-$(CONFIG_XEN) := hypervisor.o vmware.o
+disabled-obj-$(CONFIG_XEN) := hypervisor.o vmware.o sched.o
 
 quiet_cmd_mkcapflags = MKCAP   $@
       cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/cpu/amd.c	2010-01-18 16:53:52.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/cpu/amd.c	2010-01-18 16:55:14.000000000 +0100
@@ -336,7 +336,7 @@ static void __cpuinit amd_detect_cmp(str
 int amd_get_nb_id(int cpu)
 {
 	int id = 0;
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
 	id = per_cpu(cpu_llc_id, cpu);
 #endif
 	return id;
@@ -492,8 +492,10 @@ static void __cpuinit init_amd(struct cp
 	if (c->x86 == 0x10 || c->x86 == 0x11)
 		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 
+#ifndef CONFIG_XEN
 	/* get apicid instead of initial apic id from cpuid */
 	c->apicid = hard_smp_processor_id();
+#endif
 #else
 
 	/*
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/cpu/common-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/cpu/common-xen.c	2009-11-06 10:52:22.000000000 +0100
@@ -13,13 +13,13 @@
 #include <linux/io.h>
 
 #include <asm/stackprotector.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 #include <asm/mmu_context.h>
 #include <asm/hypervisor.h>
 #include <asm/processor.h>
 #include <asm/sections.h>
-#include <asm/topology.h>
-#include <asm/cpumask.h>
+#include <linux/topology.h>
+#include <linux/cpumask.h>
 #include <asm/pgtable.h>
 #include <asm/atomic.h>
 #include <asm/proto.h>
@@ -28,13 +28,12 @@
 #include <asm/desc.h>
 #include <asm/i387.h>
 #include <asm/mtrr.h>
-#include <asm/numa.h>
+#include <linux/numa.h>
 #include <asm/asm.h>
 #include <asm/cpu.h>
 #include <asm/mce.h>
 #include <asm/msr.h>
 #include <asm/pat.h>
-#include <asm/smp.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/uv/uv.h>
@@ -102,17 +101,17 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_p
 	 * TLS descriptors are currently at a different place compared to i386.
 	 * Hopefully nobody expects them at a fixed place (Wine?)
 	 */
-	[GDT_ENTRY_KERNEL32_CS]		= { { { 0x0000ffff, 0x00cf9b00 } } },
-	[GDT_ENTRY_KERNEL_CS]		= { { { 0x0000ffff, 0x00af9b00 } } },
-	[GDT_ENTRY_KERNEL_DS]		= { { { 0x0000ffff, 0x00cf9300 } } },
-	[GDT_ENTRY_DEFAULT_USER32_CS]	= { { { 0x0000ffff, 0x00cffb00 } } },
-	[GDT_ENTRY_DEFAULT_USER_DS]	= { { { 0x0000ffff, 0x00cff300 } } },
-	[GDT_ENTRY_DEFAULT_USER_CS]	= { { { 0x0000ffff, 0x00affb00 } } },
+	[GDT_ENTRY_KERNEL32_CS]		= GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
+	[GDT_ENTRY_KERNEL_CS]		= GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
+	[GDT_ENTRY_KERNEL_DS]		= GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
+	[GDT_ENTRY_DEFAULT_USER32_CS]	= GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff),
+	[GDT_ENTRY_DEFAULT_USER_DS]	= GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff),
+	[GDT_ENTRY_DEFAULT_USER_CS]	= GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff),
 #else
-	[GDT_ENTRY_KERNEL_CS]		= { { { 0x0000ffff, 0x00cf9a00 } } },
-	[GDT_ENTRY_KERNEL_DS]		= { { { 0x0000ffff, 0x00cf9200 } } },
-	[GDT_ENTRY_DEFAULT_USER_CS]	= { { { 0x0000ffff, 0x00cffa00 } } },
-	[GDT_ENTRY_DEFAULT_USER_DS]	= { { { 0x0000ffff, 0x00cff200 } } },
+	[GDT_ENTRY_KERNEL_CS]		= GDT_ENTRY_INIT(0xc09a, 0, 0xfffff),
+	[GDT_ENTRY_KERNEL_DS]		= GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
+	[GDT_ENTRY_DEFAULT_USER_CS]	= GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff),
+	[GDT_ENTRY_DEFAULT_USER_DS]	= GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff),
 #ifndef CONFIG_XEN
 	/*
 	 * Segments used for calling PnP BIOS have byte granularity.
@@ -120,29 +119,29 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_p
 	 * the transfer segment sizes are set at run time.
 	 */
 	/* 32-bit code */
-	[GDT_ENTRY_PNPBIOS_CS32]	= { { { 0x0000ffff, 0x00409a00 } } },
+	[GDT_ENTRY_PNPBIOS_CS32]	= GDT_ENTRY_INIT(0x409a, 0, 0xffff),
 	/* 16-bit code */
-	[GDT_ENTRY_PNPBIOS_CS16]	= { { { 0x0000ffff, 0x00009a00 } } },
+	[GDT_ENTRY_PNPBIOS_CS16]	= GDT_ENTRY_INIT(0x009a, 0, 0xffff),
 	/* 16-bit data */
-	[GDT_ENTRY_PNPBIOS_DS]		= { { { 0x0000ffff, 0x00009200 } } },
+	[GDT_ENTRY_PNPBIOS_DS]		= GDT_ENTRY_INIT(0x0092, 0, 0xffff),
 	/* 16-bit data */
-	[GDT_ENTRY_PNPBIOS_TS1]		= { { { 0x00000000, 0x00009200 } } },
+	[GDT_ENTRY_PNPBIOS_TS1]		= GDT_ENTRY_INIT(0x0092, 0, 0),
 	/* 16-bit data */
-	[GDT_ENTRY_PNPBIOS_TS2]		= { { { 0x00000000, 0x00009200 } } },
+	[GDT_ENTRY_PNPBIOS_TS2]		= GDT_ENTRY_INIT(0x0092, 0, 0),
 	/*
 	 * The APM segments have byte granularity and their bases
 	 * are set at run time.  All have 64k limits.
 	 */
 	/* 32-bit code */
-	[GDT_ENTRY_APMBIOS_BASE]	= { { { 0x0000ffff, 0x00409a00 } } },
+	[GDT_ENTRY_APMBIOS_BASE]	= GDT_ENTRY_INIT(0x409a, 0, 0xffff),
 	/* 16-bit code */
-	[GDT_ENTRY_APMBIOS_BASE+1]	= { { { 0x0000ffff, 0x00009a00 } } },
+	[GDT_ENTRY_APMBIOS_BASE+1]	= GDT_ENTRY_INIT(0x009a, 0, 0xffff),
 	/* data */
-	[GDT_ENTRY_APMBIOS_BASE+2]	= { { { 0x0000ffff, 0x00409200 } } },
+	[GDT_ENTRY_APMBIOS_BASE+2]	= GDT_ENTRY_INIT(0x4092, 0, 0xffff),
 
-	[GDT_ENTRY_ESPFIX_SS]		= { { { 0x0000ffff, 0x00cf9200 } } },
+	[GDT_ENTRY_ESPFIX_SS]		= GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
 #endif
-	[GDT_ENTRY_PERCPU]		= { { { 0x0000ffff, 0x00cf9200 } } },
+	[GDT_ENTRY_PERCPU]		= GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
 	GDT_STACK_CANARY_INIT
 #endif
 } };
@@ -900,7 +899,7 @@ void __init identify_boot_cpu(void)
 #else
 	vgetcpu_set_mode();
 #endif
-	init_hw_perf_counters();
+	init_hw_perf_events();
 }
 
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
@@ -1013,7 +1012,7 @@ __setup("clearcpuid=", setup_disablecpui
 
 #ifdef CONFIG_X86_64
 #ifndef CONFIG_X86_NO_IDT
-struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
+struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
 #endif
 
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
@@ -1027,13 +1026,21 @@ void xen_switch_pt(void)
 #endif
 }
 
-DEFINE_PER_CPU(char *, irq_stack_ptr) =
-	init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+/*
+ * The following four percpu variables are hot.  Align current_task to
+ * cacheline size such that all four fall in the same cacheline.
+ */
+DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
+	&init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
 
 DEFINE_PER_CPU(unsigned long, kernel_stack) =
 	(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
 EXPORT_PER_CPU_SYMBOL(kernel_stack);
 
+DEFINE_PER_CPU(char *, irq_stack_ptr) =
+	init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+
 DEFINE_PER_CPU(unsigned int, irq_count) = -1;
 
 #ifndef CONFIG_X86_NO_TSS
@@ -1049,8 +1056,7 @@ static const unsigned int exception_stac
 };
 
 static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
-	__aligned(PAGE_SIZE);
+	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
 #endif
 
 void __cpuinit syscall_init(void)
@@ -1097,8 +1103,11 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist
 
 #else	/* CONFIG_X86_64 */
 
+DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
+
 #ifdef CONFIG_CC_STACKPROTECTOR
-DEFINE_PER_CPU(unsigned long, stack_canary);
+DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
 
 /* Make sure %fs and %gs are initialized properly in idle threads */
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/cpu/mcheck/mce-inject.c	2010-03-31 09:52:27.000000000 +0200
+++ sle11sp1-2010-03-29/arch/x86/kernel/cpu/mcheck/mce-inject.c	2009-11-06 10:52:22.000000000 +0100
@@ -143,7 +143,7 @@ static void raise_mce(struct mce *m)
 	if (context == MCJ_CTX_RANDOM)
 		return;
 
-#ifdef CONFIG_X86_LOCAL_APIC
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
 	if (m->inject_flags & MCJ_NMI_BROADCAST) {
 		unsigned long start;
 		int cpu;
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/cpu/mtrr/main-xen.c	2009-11-06 10:51:55.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/cpu/mtrr/main-xen.c	2009-11-06 10:52:22.000000000 +0100
@@ -1,10 +1,9 @@
-#include <linux/init.h>
-#include <linux/proc_fs.h>
-#include <linux/ctype.h>
+#define DEBUG
+
+#include <linux/uaccess.h>
 #include <linux/module.h>
-#include <linux/seq_file.h>
-#include <asm/uaccess.h>
 #include <linux/mutex.h>
+#include <linux/init.h>
 
 #include <asm/mtrr.h>
 #include "mtrr.h"
@@ -58,7 +57,7 @@ static void __init init_table(void)
 		mtrr_usage_table[i] = 0;
 }
 
-int mtrr_add_page(unsigned long base, unsigned long size, 
+int mtrr_add_page(unsigned long base, unsigned long size,
 		  unsigned int type, bool increment)
 {
 	int error;
@@ -88,25 +87,23 @@ int mtrr_add_page(unsigned long base, un
 static int mtrr_check(unsigned long base, unsigned long size)
 {
 	if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
-		printk(KERN_WARNING
-			"mtrr: size and base must be multiples of 4 kiB\n");
-		printk(KERN_DEBUG
-			"mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
+		pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
+		pr_debug("mtrr: size: 0x%lx  base: 0x%lx\n", size, base);
 		dump_stack();
 		return -1;
 	}
 	return 0;
 }
 
-int
-mtrr_add(unsigned long base, unsigned long size, unsigned int type,
-	 bool increment)
+int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
+	     bool increment)
 {
 	if (mtrr_check(base, size))
 		return -EINVAL;
 	return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
 			     increment);
 }
+EXPORT_SYMBOL(mtrr_add);
 
 int mtrr_del_page(int reg, unsigned long base, unsigned long size)
 {
@@ -128,13 +125,13 @@ int mtrr_del_page(int reg, unsigned long
 			}
 		}
 		if (reg < 0) {
-			printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base,
-			       size);
+			pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n",
+				 base, size);
 			goto out;
 		}
 	}
 	if (mtrr_usage_table[reg] < 1) {
-		printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
+		pr_warning("mtrr: reg: %d has count=0\n", reg);
 		goto out;
 	}
 	if (--mtrr_usage_table[reg] < 1) {
@@ -153,15 +150,12 @@ int mtrr_del_page(int reg, unsigned long
 	return error;
 }
 
-int
-mtrr_del(int reg, unsigned long base, unsigned long size)
+int mtrr_del(int reg, unsigned long base, unsigned long size)
 {
 	if (mtrr_check(base, size))
 		return -EINVAL;
 	return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
 }
-
-EXPORT_SYMBOL(mtrr_add);
 EXPORT_SYMBOL(mtrr_del);
 
 /*
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/e820-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/e820-xen.c	2009-12-04 11:31:40.000000000 +0100
@@ -134,7 +134,7 @@ static void __init __e820_add_region(str
 {
 	int x = e820x->nr_map;
 
-	if (x == ARRAY_SIZE(e820x->map)) {
+	if (x >= ARRAY_SIZE(e820x->map)) {
 		printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
 		return;
 	}
@@ -1455,7 +1455,7 @@ void __init e820_reserve_resources(void)
 	struct resource *res;
 	u64 end;
 
-	res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
+	res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
 	e820_res = res;
 	for (i = 0; i < e820.nr_map; i++) {
 		end = e820.map[i].addr + e820.map[i].size - 1;
@@ -1502,8 +1502,8 @@ static unsigned long ram_alignment(resou
 	if (mb < 16)
 		return 1024*1024;
 
-	/* To 32MB for anything above that */
-	return 32*1024*1024;
+	/* To 64MB for anything above that */
+	return 64*1024*1024;
 }
 
 #define MAX_RESOURCE_SIZE ((resource_size_t)-1)
@@ -1543,59 +1543,8 @@ void __init e820_reserve_resources_late(
 
 #undef e820
 
-#ifndef CONFIG_XEN
 char *__init default_machine_specific_memory_setup(void)
 {
-	char *who = "BIOS-e820";
-	u32 new_nr;
-	/*
-	 * Try to copy the BIOS-supplied E820-map.
-	 *
-	 * Otherwise fake a memory map; one section from 0k->640k,
-	 * the next section from 1mb->appropriate_mem_k
-	 */
-	new_nr = boot_params.e820_entries;
-	sanitize_e820_map(boot_params.e820_map,
-			ARRAY_SIZE(boot_params.e820_map),
-			&new_nr);
-	boot_params.e820_entries = new_nr;
-	if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)
-	  < 0) {
-		u64 mem_size;
-
-		/* compare results from other methods and take the greater */
-		if (boot_params.alt_mem_k
-		    < boot_params.screen_info.ext_mem_k) {
-			mem_size = boot_params.screen_info.ext_mem_k;
-			who = "BIOS-88";
-		} else {
-			mem_size = boot_params.alt_mem_k;
-			who = "BIOS-e801";
-		}
-
-		e820.nr_map = 0;
-		e820_add_region(0, LOWMEMSIZE(), E820_RAM);
-		e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
-	}
-
-	/* In case someone cares... */
-	return who;
-}
-
-char *__init __attribute__((weak)) machine_specific_memory_setup(void)
-{
-	if (x86_quirks->arch_memory_setup) {
-		char *who = x86_quirks->arch_memory_setup();
-
-		if (who)
-			return who;
-	}
-	return default_machine_specific_memory_setup();
-}
-#endif
-
-static char * __init _memory_setup(void)
-{
 	int rc, nr_map;
 	struct xen_memory_map memmap;
 	static struct e820entry __initdata map[E820MAX];
@@ -1639,7 +1588,7 @@ void __init setup_memory_map(void)
 {
 	char *who;
 
-	who = _memory_setup();
+	who = x86_init.resources.memory_setup();
 #ifdef CONFIG_XEN
 	if (is_initial_xendomain()) {
 		printk(KERN_INFO "Xen-provided machine memory map:\n");
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/early_printk-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/early_printk-xen.c	2009-11-06 10:52:22.000000000 +0100
@@ -178,7 +178,6 @@ static __init void early_serial_init(cha
  * mappings. Someone should fix this for domain 0. For now, use fake serial.
  */
 #define early_vga_console early_serial_console
-#define xenboot_console early_serial_console
 
 #endif
 
@@ -189,721 +188,6 @@ static struct console early_serial_conso
 	.index =	-1,
 };
 
-#ifdef CONFIG_EARLY_PRINTK_DBGP
-
-static struct ehci_caps __iomem *ehci_caps;
-static struct ehci_regs __iomem *ehci_regs;
-static struct ehci_dbg_port __iomem *ehci_debug;
-static unsigned int dbgp_endpoint_out;
-
-struct ehci_dev {
-	u32 bus;
-	u32 slot;
-	u32 func;
-};
-
-static struct ehci_dev ehci_dev;
-
-#define USB_DEBUG_DEVNUM 127
-
-#define DBGP_DATA_TOGGLE	0x8800
-
-static inline u32 dbgp_pid_update(u32 x, u32 tok)
-{
-	return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
-}
-
-static inline u32 dbgp_len_update(u32 x, u32 len)
-{
-	return (x & ~0x0f) | (len & 0x0f);
-}
-
-/*
- * USB Packet IDs (PIDs)
- */
-
-/* token */
-#define USB_PID_OUT		0xe1
-#define USB_PID_IN		0x69
-#define USB_PID_SOF		0xa5
-#define USB_PID_SETUP		0x2d
-/* handshake */
-#define USB_PID_ACK		0xd2
-#define USB_PID_NAK		0x5a
-#define USB_PID_STALL		0x1e
-#define USB_PID_NYET		0x96
-/* data */
-#define USB_PID_DATA0		0xc3
-#define USB_PID_DATA1		0x4b
-#define USB_PID_DATA2		0x87
-#define USB_PID_MDATA		0x0f
-/* Special */
-#define USB_PID_PREAMBLE	0x3c
-#define USB_PID_ERR		0x3c
-#define USB_PID_SPLIT		0x78
-#define USB_PID_PING		0xb4
-#define USB_PID_UNDEF_0		0xf0
-
-#define USB_PID_DATA_TOGGLE	0x88
-#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE)
-
-#define PCI_CAP_ID_EHCI_DEBUG	0xa
-
-#define HUB_ROOT_RESET_TIME	50	/* times are in msec */
-#define HUB_SHORT_RESET_TIME	10
-#define HUB_LONG_RESET_TIME	200
-#define HUB_RESET_TIMEOUT	500
-
-#define DBGP_MAX_PACKET		8
-
-static int dbgp_wait_until_complete(void)
-{
-	u32 ctrl;
-	int loop = 0x100000;
-
-	do {
-		ctrl = readl(&ehci_debug->control);
-		/* Stop when the transaction is finished */
-		if (ctrl & DBGP_DONE)
-			break;
-	} while (--loop > 0);
-
-	if (!loop)
-		return -1;
-
-	/*
-	 * Now that we have observed the completed transaction,
-	 * clear the done bit.
-	 */
-	writel(ctrl | DBGP_DONE, &ehci_debug->control);
-	return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
-}
-
-static void __init dbgp_mdelay(int ms)
-{
-	int i;
-
-	while (ms--) {
-		for (i = 0; i < 1000; i++)
-			outb(0x1, 0x80);
-	}
-}
-
-static void dbgp_breath(void)
-{
-	/* Sleep to give the debug port a chance to breathe */
-}
-
-static int dbgp_wait_until_done(unsigned ctrl)
-{
-	u32 pids, lpid;
-	int ret;
-	int loop = 3;
-
-retry:
-	writel(ctrl | DBGP_GO, &ehci_debug->control);
-	ret = dbgp_wait_until_complete();
-	pids = readl(&ehci_debug->pids);
-	lpid = DBGP_PID_GET(pids);
-
-	if (ret < 0)
-		return ret;
-
-	/*
-	 * If the port is getting full or it has dropped data
-	 * start pacing ourselves, not necessary but it's friendly.
-	 */
-	if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET))
-		dbgp_breath();
-
-	/* If I get a NACK reissue the transmission */
-	if (lpid == USB_PID_NAK) {
-		if (--loop > 0)
-			goto retry;
-	}
-
-	return ret;
-}
-
-static void dbgp_set_data(const void *buf, int size)
-{
-	const unsigned char *bytes = buf;
-	u32 lo, hi;
-	int i;
-
-	lo = hi = 0;
-	for (i = 0; i < 4 && i < size; i++)
-		lo |= bytes[i] << (8*i);
-	for (; i < 8 && i < size; i++)
-		hi |= bytes[i] << (8*(i - 4));
-	writel(lo, &ehci_debug->data03);
-	writel(hi, &ehci_debug->data47);
-}
-
-static void __init dbgp_get_data(void *buf, int size)
-{
-	unsigned char *bytes = buf;
-	u32 lo, hi;
-	int i;
-
-	lo = readl(&ehci_debug->data03);
-	hi = readl(&ehci_debug->data47);
-	for (i = 0; i < 4 && i < size; i++)
-		bytes[i] = (lo >> (8*i)) & 0xff;
-	for (; i < 8 && i < size; i++)
-		bytes[i] = (hi >> (8*(i - 4))) & 0xff;
-}
-
-static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
-			 const char *bytes, int size)
-{
-	u32 pids, addr, ctrl;
-	int ret;
-
-	if (size > DBGP_MAX_PACKET)
-		return -1;
-
-	addr = DBGP_EPADDR(devnum, endpoint);
-
-	pids = readl(&ehci_debug->pids);
-	pids = dbgp_pid_update(pids, USB_PID_OUT);
-
-	ctrl = readl(&ehci_debug->control);
-	ctrl = dbgp_len_update(ctrl, size);
-	ctrl |= DBGP_OUT;
-	ctrl |= DBGP_GO;
-
-	dbgp_set_data(bytes, size);
-	writel(addr, &ehci_debug->address);
-	writel(pids, &ehci_debug->pids);
-
-	ret = dbgp_wait_until_done(ctrl);
-	if (ret < 0)
-		return ret;
-
-	return ret;
-}
-
-static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
-				 int size)
-{
-	u32 pids, addr, ctrl;
-	int ret;
-
-	if (size > DBGP_MAX_PACKET)
-		return -1;
-
-	addr = DBGP_EPADDR(devnum, endpoint);
-
-	pids = readl(&ehci_debug->pids);
-	pids = dbgp_pid_update(pids, USB_PID_IN);
-
-	ctrl = readl(&ehci_debug->control);
-	ctrl = dbgp_len_update(ctrl, size);
-	ctrl &= ~DBGP_OUT;
-	ctrl |= DBGP_GO;
-
-	writel(addr, &ehci_debug->address);
-	writel(pids, &ehci_debug->pids);
-	ret = dbgp_wait_until_done(ctrl);
-	if (ret < 0)
-		return ret;
-
-	if (size > ret)
-		size = ret;
-	dbgp_get_data(data, size);
-	return ret;
-}
-
-static int __init dbgp_control_msg(unsigned devnum, int requesttype,
-	int request, int value, int index, void *data, int size)
-{
-	u32 pids, addr, ctrl;
-	struct usb_ctrlrequest req;
-	int read;
-	int ret;
-
-	read = (requesttype & USB_DIR_IN) != 0;
-	if (size > (read ? DBGP_MAX_PACKET:0))
-		return -1;
-
-	/* Compute the control message */
-	req.bRequestType = requesttype;
-	req.bRequest = request;
-	req.wValue = cpu_to_le16(value);
-	req.wIndex = cpu_to_le16(index);
-	req.wLength = cpu_to_le16(size);
-
-	pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP);
-	addr = DBGP_EPADDR(devnum, 0);
-
-	ctrl = readl(&ehci_debug->control);
-	ctrl = dbgp_len_update(ctrl, sizeof(req));
-	ctrl |= DBGP_OUT;
-	ctrl |= DBGP_GO;
-
-	/* Send the setup message */
-	dbgp_set_data(&req, sizeof(req));
-	writel(addr, &ehci_debug->address);
-	writel(pids, &ehci_debug->pids);
-	ret = dbgp_wait_until_done(ctrl);
-	if (ret < 0)
-		return ret;
-
-	/* Read the result */
-	return dbgp_bulk_read(devnum, 0, data, size);
-}
-
-
-/* Find a PCI capability */
-static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap)
-{
-	u8 pos;
-	int bytes;
-
-	if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
-		PCI_STATUS_CAP_LIST))
-		return 0;
-
-	pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
-	for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
-		u8 id;
-
-		pos &= ~3;
-		id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
-		if (id == 0xff)
-			break;
-		if (id == cap)
-			return pos;
-
-		pos = read_pci_config_byte(num, slot, func,
-						 pos+PCI_CAP_LIST_NEXT);
-	}
-	return 0;
-}
-
-static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func)
-{
-	u32 class;
-
-	class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
-	if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI)
-		return 0;
-
-	return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG);
-}
-
-static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
-{
-	u32 bus, slot, func;
-
-	for (bus = 0; bus < 256; bus++) {
-		for (slot = 0; slot < 32; slot++) {
-			for (func = 0; func < 8; func++) {
-				unsigned cap;
-
-				cap = __find_dbgp(bus, slot, func);
-
-				if (!cap)
-					continue;
-				if (ehci_num-- != 0)
-					continue;
-				*rbus = bus;
-				*rslot = slot;
-				*rfunc = func;
-				return cap;
-			}
-		}
-	}
-	return 0;
-}
-
-static int __init ehci_reset_port(int port)
-{
-	u32 portsc;
-	u32 delay_time, delay;
-	int loop;
-
-	/* Reset the usb debug port */
-	portsc = readl(&ehci_regs->port_status[port - 1]);
-	portsc &= ~PORT_PE;
-	portsc |= PORT_RESET;
-	writel(portsc, &ehci_regs->port_status[port - 1]);
-
-	delay = HUB_ROOT_RESET_TIME;
-	for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT;
-	     delay_time += delay) {
-		dbgp_mdelay(delay);
-
-		portsc = readl(&ehci_regs->port_status[port - 1]);
-		if (portsc & PORT_RESET) {
-			/* force reset to complete */
-			loop = 2;
-			writel(portsc & ~(PORT_RWC_BITS | PORT_RESET),
-				&ehci_regs->port_status[port - 1]);
-			do {
-				portsc = readl(&ehci_regs->port_status[port-1]);
-			} while ((portsc & PORT_RESET) && (--loop > 0));
-		}
-
-		/* Device went away? */
-		if (!(portsc & PORT_CONNECT))
-			return -ENOTCONN;
-
-		/* bomb out completely if something weird happend */
-		if ((portsc & PORT_CSC))
-			return -EINVAL;
-
-		/* If we've finished resetting, then break out of the loop */
-		if (!(portsc & PORT_RESET) && (portsc & PORT_PE))
-			return 0;
-	}
-	return -EBUSY;
-}
-
-static int __init ehci_wait_for_port(int port)
-{
-	u32 status;
-	int ret, reps;
-
-	for (reps = 0; reps < 3; reps++) {
-		dbgp_mdelay(100);
-		status = readl(&ehci_regs->status);
-		if (status & STS_PCD) {
-			ret = ehci_reset_port(port);
-			if (ret == 0)
-				return 0;
-		}
-	}
-	return -ENOTCONN;
-}
-
-#ifdef DBGP_DEBUG
-# define dbgp_printk early_printk
-#else
-static inline void dbgp_printk(const char *fmt, ...) { }
-#endif
-
-typedef void (*set_debug_port_t)(int port);
-
-static void __init default_set_debug_port(int port)
-{
-}
-
-static set_debug_port_t __initdata set_debug_port = default_set_debug_port;
-
-static void __init nvidia_set_debug_port(int port)
-{
-	u32 dword;
-	dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
-				 0x74);
-	dword &= ~(0x0f<<12);
-	dword |= ((port & 0x0f)<<12);
-	write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74,
-				 dword);
-	dbgp_printk("set debug port to %d\n", port);
-}
-
-static void __init detect_set_debug_port(void)
-{
-	u32 vendorid;
-
-	vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
-		 0x00);
-
-	if ((vendorid & 0xffff) == 0x10de) {
-		dbgp_printk("using nvidia set_debug_port\n");
-		set_debug_port = nvidia_set_debug_port;
-	}
-}
-
-static int __init ehci_setup(void)
-{
-	struct usb_debug_descriptor dbgp_desc;
-	u32 cmd, ctrl, status, portsc, hcs_params;
-	u32 debug_port, new_debug_port = 0, n_ports;
-	u32  devnum;
-	int ret, i;
-	int loop;
-	int port_map_tried;
-	int playtimes = 3;
-
-try_next_time:
-	port_map_tried = 0;
-
-try_next_port:
-
-	hcs_params = readl(&ehci_caps->hcs_params);
-	debug_port = HCS_DEBUG_PORT(hcs_params);
-	n_ports    = HCS_N_PORTS(hcs_params);
-
-	dbgp_printk("debug_port: %d\n", debug_port);
-	dbgp_printk("n_ports:    %d\n", n_ports);
-
-	for (i = 1; i <= n_ports; i++) {
-		portsc = readl(&ehci_regs->port_status[i-1]);
-		dbgp_printk("portstatus%d: %08x\n", i, portsc);
-	}
-
-	if (port_map_tried && (new_debug_port != debug_port)) {
-		if (--playtimes) {
-			set_debug_port(new_debug_port);
-			goto try_next_time;
-		}
-		return -1;
-	}
-
-	loop = 10;
-	/* Reset the EHCI controller */
-	cmd = readl(&ehci_regs->command);
-	cmd |= CMD_RESET;
-	writel(cmd, &ehci_regs->command);
-	do {
-		cmd = readl(&ehci_regs->command);
-	} while ((cmd & CMD_RESET) && (--loop > 0));
-
-	if (!loop) {
-		dbgp_printk("can not reset ehci\n");
-		return -1;
-	}
-	dbgp_printk("ehci reset done\n");
-
-	/* Claim ownership, but do not enable yet */
-	ctrl = readl(&ehci_debug->control);
-	ctrl |= DBGP_OWNER;
-	ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
-	writel(ctrl, &ehci_debug->control);
-
-	/* Start the ehci running */
-	cmd = readl(&ehci_regs->command);
-	cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
-	cmd |= CMD_RUN;
-	writel(cmd, &ehci_regs->command);
-
-	/* Ensure everything is routed to the EHCI */
-	writel(FLAG_CF, &ehci_regs->configured_flag);
-
-	/* Wait until the controller is no longer halted */
-	loop = 10;
-	do {
-		status = readl(&ehci_regs->status);
-	} while ((status & STS_HALT) && (--loop > 0));
-
-	if (!loop) {
-		dbgp_printk("ehci can be started\n");
-		return -1;
-	}
-	dbgp_printk("ehci started\n");
-
-	/* Wait for a device to show up in the debug port */
-	ret = ehci_wait_for_port(debug_port);
-	if (ret < 0) {
-		dbgp_printk("No device found in debug port\n");
-		goto next_debug_port;
-	}
-	dbgp_printk("ehci wait for port done\n");
-
-	/* Enable the debug port */
-	ctrl = readl(&ehci_debug->control);
-	ctrl |= DBGP_CLAIM;
-	writel(ctrl, &ehci_debug->control);
-	ctrl = readl(&ehci_debug->control);
-	if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
-		dbgp_printk("No device in debug port\n");
-		writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
-		goto err;
-	}
-	dbgp_printk("debug ported enabled\n");
-
-	/* Completely transfer the debug device to the debug controller */
-	portsc = readl(&ehci_regs->port_status[debug_port - 1]);
-	portsc &= ~PORT_PE;
-	writel(portsc, &ehci_regs->port_status[debug_port - 1]);
-
-	dbgp_mdelay(100);
-
-	/* Find the debug device and make it device number 127 */
-	for (devnum = 0; devnum <= 127; devnum++) {
-		ret = dbgp_control_msg(devnum,
-			USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
-			&dbgp_desc, sizeof(dbgp_desc));
-		if (ret > 0)
-			break;
-	}
-	if (devnum > 127) {
-		dbgp_printk("Could not find attached debug device\n");
-		goto err;
-	}
-	if (ret < 0) {
-		dbgp_printk("Attached device is not a debug device\n");
-		goto err;
-	}
-	dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
-
-	/* Move the device to 127 if it isn't already there */
-	if (devnum != USB_DEBUG_DEVNUM) {
-		ret = dbgp_control_msg(devnum,
-			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
-		if (ret < 0) {
-			dbgp_printk("Could not move attached device to %d\n",
-				USB_DEBUG_DEVNUM);
-			goto err;
-		}
-		devnum = USB_DEBUG_DEVNUM;
-		dbgp_printk("debug device renamed to 127\n");
-	}
-
-	/* Enable the debug interface */
-	ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
-		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-		USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
-	if (ret < 0) {
-		dbgp_printk(" Could not enable the debug device\n");
-		goto err;
-	}
-	dbgp_printk("debug interface enabled\n");
-
-	/* Perform a small write to get the even/odd data state in sync
-	 */
-	ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
-	if (ret < 0) {
-		dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
-		goto err;
-	}
-	dbgp_printk("small write doned\n");
-
-	return 0;
-err:
-	/* Things didn't work so remove my claim */
-	ctrl = readl(&ehci_debug->control);
-	ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
-	writel(ctrl, &ehci_debug->control);
-	return -1;
-
-next_debug_port:
-	port_map_tried |= (1<<(debug_port - 1));
-	new_debug_port = ((debug_port-1+1)%n_ports) + 1;
-	if (port_map_tried != ((1<<n_ports) - 1)) {
-		set_debug_port(new_debug_port);
-		goto try_next_port;
-	}
-	if (--playtimes) {
-		set_debug_port(new_debug_port);
-		goto try_next_time;
-	}
-
-	return -1;
-}
-
-static int __init early_dbgp_init(char *s)
-{
-	u32 debug_port, bar, offset;
-	u32 bus, slot, func, cap;
-	void __iomem *ehci_bar;
-	u32 dbgp_num;
-	u32 bar_val;
-	char *e;
-	int ret;
-	u8 byte;
-
-	if (!early_pci_allowed())
-		return -1;
-
-	dbgp_num = 0;
-	if (*s)
-		dbgp_num = simple_strtoul(s, &e, 10);
-	dbgp_printk("dbgp_num: %d\n", dbgp_num);
-
-	cap = find_dbgp(dbgp_num, &bus, &slot, &func);
-	if (!cap)
-		return -1;
-
-	dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot,
-			 func);
-
-	debug_port = read_pci_config(bus, slot, func, cap);
-	bar = (debug_port >> 29) & 0x7;
-	bar = (bar * 4) + 0xc;
-	offset = (debug_port >> 16) & 0xfff;
-	dbgp_printk("bar: %02x offset: %03x\n", bar, offset);
-	if (bar != PCI_BASE_ADDRESS_0) {
-		dbgp_printk("only debug ports on bar 1 handled.\n");
-
-		return -1;
-	}
-
-	bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
-	dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset);
-	if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) {
-		dbgp_printk("only simple 32bit mmio bars supported\n");
-
-		return -1;
-	}
-
-	/* double check if the mem space is enabled */
-	byte = read_pci_config_byte(bus, slot, func, 0x04);
-	if (!(byte & 0x2)) {
-		byte  |= 0x02;
-		write_pci_config_byte(bus, slot, func, 0x04, byte);
-		dbgp_printk("mmio for ehci enabled\n");
-	}
-
-	/*
-	 * FIXME I don't have the bar size so just guess PAGE_SIZE is more
-	 * than enough.  1K is the biggest I have seen.
-	 */
-	set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK);
-	ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE);
-	ehci_bar += bar_val & ~PAGE_MASK;
-	dbgp_printk("ehci_bar: %p\n", ehci_bar);
-
-	ehci_caps  = ehci_bar;
-	ehci_regs  = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
-	ehci_debug = ehci_bar + offset;
-	ehci_dev.bus = bus;
-	ehci_dev.slot = slot;
-	ehci_dev.func = func;
-
-	detect_set_debug_port();
-
-	ret = ehci_setup();
-	if (ret < 0) {
-		dbgp_printk("ehci_setup failed\n");
-		ehci_debug = NULL;
-
-		return -1;
-	}
-
-	return 0;
-}
-
-static void early_dbgp_write(struct console *con, const char *str, u32 n)
-{
-	int chunk, ret;
-
-	if (!ehci_debug)
-		return;
-	while (n > 0) {
-		chunk = n;
-		if (chunk > DBGP_MAX_PACKET)
-			chunk = DBGP_MAX_PACKET;
-		ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
-			dbgp_endpoint_out, str, chunk);
-		str += chunk;
-		n -= chunk;
-	}
-}
-
-static struct console early_dbgp_console = {
-	.name =		"earlydbg",
-	.write =	early_dbgp_write,
-	.flags =	CON_PRINTBUFFER,
-	.index =	-1,
-};
-#endif
-
 /* Direct interface for emergencies */
 static struct console *early_console = &early_vga_console;
 static int __initdata early_console_initialized;
@@ -920,10 +204,24 @@ asmlinkage void early_printk(const char 
 	va_end(ap);
 }
 
+static inline void early_console_register(struct console *con, int keep_early)
+{
+	if (early_console->index != -1) {
+		printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n",
+		       con->name);
+		return;
+	}
+	early_console = con;
+	if (keep_early)
+		early_console->flags &= ~CON_BOOT;
+	else
+		early_console->flags |= CON_BOOT;
+	register_console(early_console);
+}
 
 static int __init setup_early_printk(char *buf)
 {
-	int keep_early;
+	int keep;
 
 	if (!buf)
 		return 0;
@@ -932,44 +230,41 @@ static int __init setup_early_printk(cha
 		return 0;
 	early_console_initialized = 1;
 
-	keep_early = (strstr(buf, "keep") != NULL);
+	keep = (strstr(buf, "keep") != NULL);
 
-	if (!strncmp(buf, "serial", 6)) {
-		early_serial_init(buf + 6);
-		early_console = &early_serial_console;
-	} else if (!strncmp(buf, "ttyS", 4)) {
-		early_serial_init(buf);
-		early_console = &early_serial_console;
-	} else if (!strncmp(buf, "vga", 3)) {
+	while (*buf != '\0') {
+		if (!strncmp(buf, "serial", 6)) {
+			buf += 6;
+			early_serial_init(buf);
+			early_console_register(&early_serial_console, keep);
+			if (!strncmp(buf, ",ttyS", 5))
+				buf += 5;
+		}
+		if (!strncmp(buf, "ttyS", 4)) {
+			early_serial_init(buf + 4);
+			early_console_register(&early_serial_console, keep);
+		}
 #ifndef CONFIG_XEN
-		&& boot_params.screen_info.orig_video_isVGA == 1) {
-		max_xpos = boot_params.screen_info.orig_video_cols;
-		max_ypos = boot_params.screen_info.orig_video_lines;
-		current_ypos = boot_params.screen_info.orig_y;
+		if (!strncmp(buf, "vga", 3) &&
+		    boot_params.screen_info.orig_video_isVGA == 1) {
+			max_xpos = boot_params.screen_info.orig_video_cols;
+			max_ypos = boot_params.screen_info.orig_video_lines;
+			current_ypos = boot_params.screen_info.orig_y;
+#else
+		if (!strncmp(buf, "vga", 3) || !strncmp(buf, "xen", 3)) {
 #endif
-		early_console = &early_vga_console;
+			early_console_register(&early_vga_console, keep);
+		}
 #ifdef CONFIG_EARLY_PRINTK_DBGP
-	} else if (!strncmp(buf, "dbgp", 4)) {
-		if (early_dbgp_init(buf+4) < 0)
-			return 0;
-		early_console = &early_dbgp_console;
-		/*
-		 * usb subsys will reset ehci controller, so don't keep
-		 * that early console
-		 */
-		keep_early = 0;
+		if (!strncmp(buf, "dbgp", 4) && !early_dbgp_init(buf + 4))
+			early_console_register(&early_dbgp_console, keep);
 #endif
-#ifdef CONFIG_XEN
-	} else if (!strncmp(buf, "xen", 3)) {
-		early_console = &xenboot_console;
+#ifdef CONFIG_HVC_XEN
+		if (!strncmp(buf, "xen", 3))
+			early_console_register(&xenboot_console, keep);
 #endif
+		buf++;
 	}
-
-	if (keep_early)
-		early_console->flags &= ~CON_BOOT;
-	else
-		early_console->flags |= CON_BOOT;
-	register_console(early_console);
 	return 0;
 }
 
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/entry_64-xen.S	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/entry_64-xen.S	2009-11-06 10:52:22.000000000 +0100
@@ -53,6 +53,7 @@
 #include <asm/hw_irq.h>
 #include <asm/page_types.h>
 #include <asm/irqflags.h>
+#include <asm/processor-flags.h>
 #include <asm/ftrace.h>
 #include <asm/percpu.h>
 #include <xen/interface/xen.h>
@@ -150,7 +151,7 @@ ENTRY(ftrace_graph_caller)
 END(ftrace_graph_caller)
 
 GLOBAL(return_to_handler)
-	subq  $80, %rsp
+	subq  $24, %rsp
 
 	/* Save the return values */
 	movq %rax, (%rsp)
@@ -159,10 +160,10 @@ GLOBAL(return_to_handler)
 
 	call ftrace_return_to_handler
 
-	movq %rax, 72(%rsp)
+	movq %rax, 16(%rsp)
 	movq 8(%rsp), %rdx
 	movq (%rsp), %rax
-	addq $72, %rsp
+	addq $16, %rsp
 	retq
 #endif
 
@@ -553,20 +554,13 @@ sysret_signal:
 	bt $TIF_SYSCALL_AUDIT,%edx
 	jc sysret_audit
 #endif
-	/* edx:	work flags (arg3) */
-	leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
-	xorl %esi,%esi # oldset -> arg2
-	SAVE_REST
-	FIXUP_TOP_OF_STACK %r11
-	call do_notify_resume
-	RESTORE_TOP_OF_STACK %r11
-	RESTORE_REST
-	movl $_TIF_WORK_MASK,%edi
-	/* Use IRET because user could have changed frame. This
-	   works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
-	DISABLE_INTERRUPTS(CLBR_NONE)
-	TRACE_IRQS_OFF
-	jmp int_with_check
+	/*
+	 * We have a signal, or exit tracing or single-step.
+	 * These all wind up with the iret return path anyway,
+	 * so just join that path right now.
+	 */
+	FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
+	jmp int_check_syscall_exit_work
 
 badsys:
 	movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
@@ -675,6 +669,7 @@ int_careful:
 int_very_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
+int_check_syscall_exit_work:
 	SAVE_REST
 	/* Check for syscall exit trace */
 	testl $_TIF_WORK_SYSCALL_EXIT,%edx
@@ -921,7 +916,7 @@ apicinterrupt ERROR_APIC_VECTOR \
 apicinterrupt SPURIOUS_APIC_VECTOR \
 	spurious_interrupt smp_spurious_interrupt
 
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 apicinterrupt LOCAL_PENDING_VECTOR \
 	perf_pending_interrupt smp_perf_pending_interrupt
 #endif
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/head-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/head-xen.c	2009-11-06 10:52:22.000000000 +0100
@@ -59,7 +59,6 @@ void __init reserve_ebda_region(void)
 #include <asm/fixmap.h>
 #include <asm/pgtable.h>
 #include <asm/sections.h>
-#include <asm/setup_arch.h>
 #include <xen/interface/callback.h>
 #include <xen/interface/memory.h>
 
@@ -163,7 +162,7 @@ void __init xen_start_kernel(void)
 
 }
 
-void __init machine_specific_arch_setup(void)
+void __init xen_arch_setup(void)
 {
 	int ret;
 	static const struct callback_register __initconst event = {
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/head32-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/head32-xen.c	2009-11-06 10:52:22.000000000 +0100
@@ -9,11 +9,26 @@
 #include <linux/start_kernel.h>
 
 #include <asm/setup.h>
-#include <asm/setup_arch.h>
 #include <asm/sections.h>
 #include <asm/e820.h>
-#include <asm/bios_ebda.h>
+#include <asm/page.h>
 #include <asm/trampoline.h>
+#include <asm/apic.h>
+#include <asm/io_apic.h>
+#include <asm/bios_ebda.h>
+
+static void __init i386_default_early_setup(void)
+{
+	/* Initialize 32bit specific setup functions */
+	if (is_initial_xendomain())
+		x86_init.resources.probe_roms = probe_roms;
+	x86_init.resources.reserve_resources = i386_reserve_resources;
+#ifndef CONFIG_XEN
+	x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
+
+	reserve_ebda_region();
+#endif
+}
 
 void __init i386_start_kernel(void)
 {
@@ -31,7 +46,16 @@ void __init i386_start_kernel(void)
 		reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
 	}
 #endif
-	reserve_ebda_region();
+
+	/* Call the subarch specific early setup function */
+	switch (boot_params.hdr.hardware_subarch) {
+	case X86_SUBARCH_MRST:
+		x86_mrst_early_setup();
+		break;
+	default:
+		i386_default_early_setup();
+		break;
+	}
 #else
 	{
 		int max_cmdline;
@@ -42,6 +66,7 @@ void __init i386_start_kernel(void)
 		boot_command_line[max_cmdline-1] = '\0';
 	}
 
+	i386_default_early_setup();
 	xen_start_kernel();
 #endif
 
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/head64-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/head64-xen.c	2009-11-06 10:52:22.000000000 +0100
@@ -20,15 +20,14 @@
 #include <asm/proto.h>
 #include <asm/smp.h>
 #include <asm/setup.h>
-#include <asm/setup_arch.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/kdebug.h>
 #include <asm/e820.h>
-#include <asm/bios_ebda.h>
 #include <asm/trampoline.h>
+#include <asm/bios_ebda.h>
 
 #ifndef CONFIG_XEN
 static void __init zap_identity_mappings(void)
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/head_32-xen.S	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/head_32-xen.S	2009-11-06 10:52:22.000000000 +0100
@@ -30,7 +30,7 @@
 #define X86_CAPABILITY	new_cpu_data+CPUINFO_x86_capability
 #define X86_VENDOR_ID	new_cpu_data+CPUINFO_x86_vendor_id
 
-.section .text.head,"ax",@progbits
+__HEAD
 #define VIRT_ENTRY_OFFSET 0x0
 .org VIRT_ENTRY_OFFSET
 ENTRY(startup_32)
@@ -69,7 +69,6 @@ ENTRY(startup_32)
 	 */
 	movl $per_cpu__gdt_page,%eax
 	movl $per_cpu__stack_canary,%ecx
-	subl $20, %ecx
 	movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
 	shrl $16, %ecx
 	movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
@@ -122,7 +121,7 @@ ENTRY(hypercall_page)
 /*
  * BSS section
  */
-.section ".bss.page_aligned","wa"
+__PAGE_ALIGNED_BSS
 	.align PAGE_SIZE_asm
 ENTRY(swapper_pg_fixmap)
 	.fill 1024,4,0
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/head_64-xen.S	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/head_64-xen.S	2009-11-06 10:52:22.000000000 +0100
@@ -23,7 +23,7 @@
 #include <asm/percpu.h>
 #include <xen/interface/elfnote.h>
 
-	.section .text.head, "ax", @progbits
+	__HEAD
 	.code64
 	.globl startup_64
 startup_64:
@@ -51,7 +51,7 @@ startup_64:
 
 #define NEXT_PAGE(name) \
 	.balign	PAGE_SIZE; \
-	phys_##name = . - .text.head; \
+	phys_##name = . - .head.text; \
 ENTRY(name)
 
 NEXT_PAGE(init_level4_pgt)
@@ -104,7 +104,7 @@ NEXT_PAGE(hypercall_page)
 
 #undef NEXT_PAGE
 
-	.section .bss.page_aligned, "aw", @nobits
+	__PAGE_ALIGNED_BSS
 	.align PAGE_SIZE
 ENTRY(empty_zero_page)
 	.skip PAGE_SIZE
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/irq-xen.c	2009-12-18 09:58:56.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/irq-xen.c	2009-12-18 09:59:05.000000000 +0100
@@ -67,10 +67,10 @@ static int show_other_interrupts(struct 
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
 	seq_printf(p, "  Spurious interrupts\n");
-	seq_printf(p, "%*s: ", prec, "CNT");
+	seq_printf(p, "%*s: ", prec, "PMI");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
-	seq_printf(p, "  Performance counter interrupts\n");
+	seq_printf(p, "  Performance monitoring interrupts\n");
 	seq_printf(p, "%*s: ", prec, "PND");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
@@ -112,7 +112,7 @@ static int show_other_interrupts(struct 
 		seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
 	seq_printf(p, "  Threshold APIC interrupts\n");
 #endif
-#ifdef CONFIG_X86_NEW_MCE
+#ifdef CONFIG_X86_MCE
 	seq_printf(p, "%*s: ", prec, "MCE");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
@@ -212,7 +212,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 #ifdef CONFIG_X86_MCE_THRESHOLD
 	sum += irq_stats(cpu)->irq_threshold_count;
 #endif
-#ifdef CONFIG_X86_NEW_MCE
+#ifdef CONFIG_X86_MCE
 	sum += per_cpu(mce_exception_count, cpu);
 	sum += per_cpu(mce_poll_count, cpu);
 #endif
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/irq_32-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/irq_32-xen.c	2009-11-06 10:52:22.000000000 +0100
@@ -218,7 +218,6 @@ bool handle_irq(unsigned irq, struct pt_
 void fixup_irqs(void)
 {
 	unsigned int irq;
-	static int warned;
 	struct irq_desc *desc;
 
 	for_each_irq_desc(irq, desc) {
@@ -236,8 +235,8 @@ void fixup_irqs(void)
 		}
 		if (desc->chip->set_affinity)
 			desc->chip->set_affinity(irq, affinity);
-		else if (desc->action && !(warned++))
-			printk("Cannot set affinity for irq %i\n", irq);
+		else if (desc->action)
+			printk_once("Cannot set affinity for irq %i\n", irq);
 	}
 
 #if 0
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/ldt-xen.c	2009-11-06 10:51:55.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/ldt-xen.c	2009-11-06 10:52:22.000000000 +0100
@@ -70,8 +70,8 @@ static int alloc_ldt(mm_context_t *pc, i
 				    XENFEAT_writable_descriptor_tables);
 		load_LDT(pc);
 #ifdef CONFIG_SMP
-		if (!cpus_equal(current->mm->cpu_vm_mask,
-				cpumask_of_cpu(smp_processor_id())))
+		if (!cpumask_equal(mm_cpumask(current->mm),
+				   cpumask_of(smp_processor_id())))
 			smp_call_function(flush_ldt, current->mm, 1);
 		preempt_enable();
 #endif
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/microcode_core-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/microcode_core-xen.c	2009-11-06 10:52:22.000000000 +0100
@@ -97,8 +97,8 @@ static ssize_t microcode_write(struct fi
 {
 	ssize_t ret = -EINVAL;
 
-	if ((len >> PAGE_SHIFT) > num_physpages) {
-		pr_err("microcode: too much data (max %ld pages)\n", num_physpages);
+	if ((len >> PAGE_SHIFT) > totalram_pages) {
+		pr_err("microcode: too much data (max %ld pages)\n", totalram_pages);
 		return ret;
  	}
 
@@ -121,7 +121,7 @@ static const struct file_operations micr
 static struct miscdevice microcode_dev = {
 	.minor			= MICROCODE_MINOR,
 	.name			= "microcode",
-	.devnode		= "cpu/microcode",
+	.nodename		= "cpu/microcode",
 	.fops			= &microcode_fops,
 };
 
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/mpparse-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/mpparse-xen.c	2009-11-06 10:52:22.000000000 +0100
@@ -51,6 +51,13 @@ static int __init mpf_checksum(unsigned 
 	return sum & 0xFF;
 }
 
+#ifndef CONFIG_XEN
+int __init default_mpc_apic_id(struct mpc_cpu *m)
+{
+	return m->apicid;
+}
+#endif
+
 static void __init MP_processor_info(struct mpc_cpu *m)
 {
 #ifndef CONFIG_XEN
@@ -62,10 +69,7 @@ static void __init MP_processor_info(str
 		return;
 	}
 
-	if (x86_quirks->mpc_apic_id)
-		apicid = x86_quirks->mpc_apic_id(m);
-	else
-		apicid = m->apicid;
+	apicid = x86_init.mpparse.mpc_apic_id(m);
 
 	if (m->cpuflag & CPU_BOOTPROCESSOR) {
 		bootup_cpu = " (Bootup-CPU)";
@@ -80,16 +84,18 @@ static void __init MP_processor_info(str
 }
 
 #ifdef CONFIG_X86_IO_APIC
-static void __init MP_bus_info(struct mpc_bus *m)
+void __init default_mpc_oem_bus_info(struct mpc_bus *m, char *str)
 {
-	char str[7];
 	memcpy(str, m->bustype, 6);
 	str[6] = 0;
+	apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str);
+}
 
-	if (x86_quirks->mpc_oem_bus_info)
-		x86_quirks->mpc_oem_bus_info(m, str);
-	else
-		apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str);
+static void __init MP_bus_info(struct mpc_bus *m)
+{
+	char str[7];
+
+	x86_init.mpparse.mpc_oem_bus_info(m, str);
 
 #if MAX_MP_BUSSES < 256
 	if (m->busid >= MAX_MP_BUSSES) {
@@ -106,8 +112,8 @@ static void __init MP_bus_info(struct mp
 		mp_bus_id_to_type[m->busid] = MP_BUS_ISA;
 #endif
 	} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
-		if (x86_quirks->mpc_oem_pci_bus)
-			x86_quirks->mpc_oem_pci_bus(m);
+		if (x86_init.mpparse.mpc_oem_pci_bus)
+			x86_init.mpparse.mpc_oem_pci_bus(m);
 
 		clear_bit(m->busid, mp_bus_not_pci);
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
@@ -301,6 +307,8 @@ static void __init smp_dump_mptable(stru
 			1, mpc, mpc->length, 1);
 }
 
+void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { }
+
 static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
 {
 	char str[16];
@@ -322,16 +330,13 @@ static int __init smp_read_mpc(struct mp
 	if (early)
 		return 1;
 
-	if (mpc->oemptr && x86_quirks->smp_read_mpc_oem) {
-		struct mpc_oemtable *oem_table = (void *)(long)mpc->oemptr;
-		x86_quirks->smp_read_mpc_oem(oem_table, mpc->oemsize);
-	}
+	if (mpc->oemptr)
+		x86_init.mpparse.smp_read_mpc_oem(mpc);
 
 	/*
 	 *      Now process the configuration blocks.
 	 */
-	if (x86_quirks->mpc_record)
-		*x86_quirks->mpc_record = 0;
+	x86_init.mpparse.mpc_record(0);
 
 	while (count < mpc->length) {
 		switch (*mpt) {
@@ -363,8 +368,7 @@ static int __init smp_read_mpc(struct mp
 			count = mpc->length;
 			break;
 		}
-		if (x86_quirks->mpc_record)
-			(*x86_quirks->mpc_record)++;
+		x86_init.mpparse.mpc_record(1);
 	}
 
 #ifdef CONFIG_X86_BIGSMP
@@ -492,11 +496,11 @@ static void __init construct_ioapic_tabl
 		MP_bus_info(&bus);
 	}
 
-	ioapic.type = MP_IOAPIC;
-	ioapic.apicid = 2;
-	ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
-	ioapic.flags = MPC_APIC_USABLE;
-	ioapic.apicaddr = 0xFEC00000;
+	ioapic.type	= MP_IOAPIC;
+	ioapic.apicid	= 2;
+	ioapic.apicver	= mpc_default_type > 4 ? 0x10 : 0x01;
+	ioapic.flags	= MPC_APIC_USABLE;
+	ioapic.apicaddr	= IO_APIC_DEFAULT_PHYS_BASE;
 	MP_ioapic_info(&ioapic);
 
 	/*
@@ -618,7 +622,7 @@ static int __init check_physptr(struct m
 /*
  * Scan the memory blocks for an SMP configuration block.
  */
-static void __init __get_smp_config(unsigned int early)
+void __init default_get_smp_config(unsigned int early)
 {
 	struct mpf_intel *mpf = mpf_found;
 
@@ -635,11 +639,6 @@ static void __init __get_smp_config(unsi
 	if (acpi_lapic && acpi_ioapic)
 		return;
 
-	if (x86_quirks->mach_get_smp_config) {
-		if (x86_quirks->mach_get_smp_config(early))
-			return;
-	}
-
 	printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
 	       mpf->specification);
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
@@ -680,16 +679,6 @@ static void __init __get_smp_config(unsi
 	 */
 }
 
-void __init early_get_smp_config(void)
-{
-	__get_smp_config(1);
-}
-
-void __init get_smp_config(void)
-{
-	__get_smp_config(0);
-}
-
 #ifndef CONFIG_XEN
 static void __init smp_reserve_bootmem(struct mpf_intel *mpf)
 {
@@ -761,16 +750,12 @@ static int __init smp_scan_config(unsign
 	return 0;
 }
 
-static void __init __find_smp_config(unsigned int reserve)
+void __init default_find_smp_config(unsigned int reserve)
 {
 #ifndef CONFIG_XEN
 	unsigned int address;
 #endif
 
-	if (x86_quirks->mach_find_smp_config) {
-		if (x86_quirks->mach_find_smp_config(reserve))
-			return;
-	}
 	/*
 	 * FIXME: Linux assumes you have 640K of base ram..
 	 * this continues the error...
@@ -807,16 +792,6 @@ static void __init __find_smp_config(uns
 #endif
 }
 
-void __init early_find_smp_config(void)
-{
-	__find_smp_config(0);
-}
-
-void __init find_smp_config(void)
-{
-	__find_smp_config(1);
-}
-
 #ifdef CONFIG_X86_IO_APIC
 static u8 __initdata irq_used[MAX_IRQ_SOURCES];
 
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/pci-dma-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/pci-dma-xen.c	2009-11-18 14:54:16.000000000 +0100
@@ -3,6 +3,7 @@
 #include <linux/dmar.h>
 #include <linux/bootmem.h>
 #include <linux/pci.h>
+#include <linux/kmemleak.h>
 
 #include <asm/proto.h>
 #include <asm/dma.h>
@@ -32,17 +33,22 @@ int no_iommu __read_mostly;
 /* Set this to 1 if there is a HW IOMMU in the system */
 int iommu_detected __read_mostly = 0;
 
-int iommu_pass_through;
+/*
+ * This variable becomes 1 if iommu=pt is passed on the kernel command line.
+ * If this variable is 1, IOMMU implementations do no DMA translation for
+ * devices and allow every device to access to whole physical memory. This is
+ * useful if a user want to use an IOMMU only for KVM device assignment to
+ * guests and not for driver dma translation.
+ */
+int iommu_pass_through __read_mostly;
 
 dma_addr_t bad_dma_address __read_mostly = 0;
 EXPORT_SYMBOL(bad_dma_address);
 
-/* Dummy device used for NULL arguments (normally ISA). Better would
-   be probably a smaller DMA mask, but this is bug-to-bug compatible
-   to older i386. */
+/* Dummy device used for NULL arguments (normally ISA). */
 struct device x86_dma_fallback_dev = {
 	.init_name = "fallback device",
-	.coherent_dma_mask = DMA_BIT_MASK(32),
+	.coherent_dma_mask = ISA_DMA_BIT_MASK,
 	.dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
 };
 EXPORT_SYMBOL(x86_dma_fallback_dev);
@@ -88,6 +94,11 @@ void __init dma32_reserve_bootmem(void)
 	size = roundup(dma32_bootmem_size, align);
 	dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
 				 512ULL<<20);
+	/*
+	 * Kmemleak should not scan this block as it may not be mapped via the
+	 * kernel direct mapping.
+	 */
+	kmemleak_ignore(dma32_bootmem_ptr);
 	if (dma32_bootmem_ptr)
 		dma32_bootmem_size = size;
 	else
@@ -178,7 +189,7 @@ again:
 
 #ifndef CONFIG_XEN
 	addr = page_to_phys(page);
-	if (!is_buffer_dma_capable(dma_mask, addr, size)) {
+	if (addr + size > dma_mask) {
 		__free_pages(page, order);
 
 		if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {
@@ -266,10 +277,8 @@ static __init int iommu_setup(char *p)
 		if (!strncmp(p, "soft", 4))
 			swiotlb = 1;
 #endif
-		if (!strncmp(p, "pt", 2)) {
+		if (!strncmp(p, "pt", 2))
 			iommu_pass_through = 1;
-			return 1;
-		}
 
 		gart_parse_options(p);
 
@@ -381,7 +390,7 @@ void pci_iommu_shutdown(void)
 	amd_iommu_shutdown();
 }
 /* Must execute after PCI subsystem */
-fs_initcall(pci_iommu_init);
+rootfs_initcall(pci_iommu_init);
 
 #ifdef CONFIG_PCI
 /* Many VIA bridges seem to corrupt data for DAC. Disable it here */
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/pci-nommu-xen.c	2010-01-27 14:45:15.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/pci-nommu-xen.c	2010-01-27 14:49:11.000000000 +0100
@@ -36,7 +36,7 @@ gnttab_map_sg(struct device *hwdev, stru
 		sg->dma_address =
 			gnttab_dma_map_page(sg_page(sg)) + sg->offset;
 		sg->dma_length  = sg->length;
-		IOMMU_BUG_ON(address_needs_mapping(
+		IOMMU_BUG_ON(!dma_capable(
 			hwdev, sg->dma_address, sg->length));
 		IOMMU_BUG_ON(range_straddles_page_boundary(
 			page_to_pseudophys(sg_page(sg)) + sg->offset,
@@ -69,7 +69,7 @@ gnttab_map_page(struct device *dev, stru
 	dma = gnttab_dma_map_page(page) + offset;
 	IOMMU_BUG_ON(range_straddles_page_boundary(page_to_pseudophys(page) +
 						   offset, size));
-	IOMMU_BUG_ON(address_needs_mapping(dev, dma, size));
+	IOMMU_BUG_ON(!dma_capable(dev, dma, size));
 
 	return dma;
 }
@@ -81,19 +81,36 @@ gnttab_unmap_page(struct device *dev, dm
 	gnttab_dma_unmap_page(dma_addr);
 }
 
+static void nommu_sync_single_for_device(struct device *dev,
+			dma_addr_t addr, size_t size,
+			enum dma_data_direction dir)
+{
+	flush_write_buffers();
+}
+
+
+static void nommu_sync_sg_for_device(struct device *dev,
+			struct scatterlist *sg, int nelems,
+			enum dma_data_direction dir)
+{
+	flush_write_buffers();
+}
+
 static int nommu_dma_supported(struct device *hwdev, u64 mask)
 {
 	return 1;
 }
 
 struct dma_map_ops nommu_dma_ops = {
-	.alloc_coherent	= dma_generic_alloc_coherent,
-	.free_coherent	= dma_generic_free_coherent,
-	.map_page	= gnttab_map_page,
-	.unmap_page	= gnttab_unmap_page,
-	.map_sg		= gnttab_map_sg,
-	.unmap_sg	= gnttab_unmap_sg,
-	.dma_supported	= nommu_dma_supported,
+	.alloc_coherent		= dma_generic_alloc_coherent,
+	.free_coherent		= dma_generic_free_coherent,
+	.map_page		= gnttab_map_page,
+	.unmap_page		= gnttab_unmap_page,
+	.map_sg			= gnttab_map_sg,
+	.unmap_sg		= gnttab_unmap_sg,
+	.sync_single_for_device = nommu_sync_single_for_device,
+	.sync_sg_for_device	= nommu_sync_sg_for_device,
+	.dma_supported		= nommu_dma_supported,
 };
 
 void __init no_iommu_init(void)
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/process-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/process-xen.c	2009-11-06 10:52:23.000000000 +0100
@@ -9,7 +9,7 @@
 #include <linux/pm.h>
 #include <linux/clockchips.h>
 #include <linux/random.h>
-#include <trace/power.h>
+#include <trace/events/power.h>
 #include <asm/system.h>
 #include <asm/apic.h>
 #include <asm/syscalls.h>
@@ -26,9 +26,6 @@ EXPORT_SYMBOL(idle_nomwait);
 
 struct kmem_cache *task_xstate_cachep;
 
-DEFINE_TRACE(power_start);
-DEFINE_TRACE(power_end);
-
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
 	*dst = *src;
@@ -285,9 +282,7 @@ static inline int hlt_use_halt(void)
  */
 void xen_idle(void)
 {
-	struct power_trace it;
-
-	trace_power_start(&it, POWER_CSTATE, 1);
+	trace_power_start(POWER_CSTATE, 1);
 	current_thread_info()->status &= ~TS_POLLING;
 	/*
 	 * TS_POLLING-cleared state must be visible before we
@@ -300,7 +295,6 @@ void xen_idle(void)
 	else
 		local_irq_enable();
 	current_thread_info()->status |= TS_POLLING;
-	trace_power_end(&it);
 }
 #ifdef CONFIG_APM_MODULE
 EXPORT_SYMBOL(default_idle);
@@ -354,9 +348,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
  */
 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
-	struct power_trace it;
-
-	trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
+	trace_power_start(POWER_CSTATE, (ax>>4)+1);
 	if (!need_resched()) {
 		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
@@ -366,15 +358,13 @@ void mwait_idle_with_hints(unsigned long
 		if (!need_resched())
 			__mwait(ax, cx);
 	}
-	trace_power_end(&it);
 }
 
 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
 static void mwait_idle(void)
 {
-	struct power_trace it;
 	if (!need_resched()) {
-		trace_power_start(&it, POWER_CSTATE, 1);
+		trace_power_start(POWER_CSTATE, 1);
 		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
 
@@ -384,7 +374,6 @@ static void mwait_idle(void)
 			__sti_mwait(0, 0);
 		else
 			local_irq_enable();
-		trace_power_end(&it);
 	} else
 		local_irq_enable();
 }
@@ -397,13 +386,11 @@ static void mwait_idle(void)
  */
 static void poll_idle(void)
 {
-	struct power_trace it;
-
-	trace_power_start(&it, POWER_CSTATE, 0);
+	trace_power_start(POWER_CSTATE, 0);
 	local_irq_enable();
 	while (!need_resched())
 		cpu_relax();
-	trace_power_end(&it);
+	trace_power_end(0);
 }
 
 #ifndef CONFIG_XEN
@@ -556,10 +543,8 @@ void __init init_c1e_mask(void)
 {
 #ifndef CONFIG_XEN
 	/* If we're using c1e_idle, we need to allocate c1e_mask. */
-	if (pm_idle == c1e_idle) {
-		alloc_cpumask_var(&c1e_mask, GFP_KERNEL);
-		cpumask_clear(c1e_mask);
-	}
+	if (pm_idle == c1e_idle)
+		zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
 #endif
 }
 
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/process_32-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/process_32-xen.c	2009-11-06 10:52:23.000000000 +0100
@@ -66,9 +66,6 @@
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
 asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork");
 
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
 /*
  * Return saved PC of a blocked thread.
  */
@@ -360,6 +357,7 @@ __switch_to(struct task_struct *prev_p, 
 #ifndef CONFIG_X86_NO_TSS
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 #endif
+	bool preload_fpu;
 #if CONFIG_XEN_COMPAT > 0x030002
 	struct physdev_set_iopl iopl_op;
 	struct physdev_set_iobitmap iobmp_op;
@@ -373,15 +371,24 @@ __switch_to(struct task_struct *prev_p, 
 	/* XEN NOTE: FS/GS saved in switch_mm(), not here. */
 
 	/*
+	 * If the task has used fpu the last 5 timeslices, just do a full
+	 * restore of the math state immediately to avoid the trap; the
+	 * chances of needing FPU soon are obviously high now
+	 */
+	preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
+
+	/*
 	 * This is basically '__unlazy_fpu', except that we queue a
 	 * multicall to indicate FPU task switch, rather than
 	 * synchronously trapping to Xen.
 	 */
 	if (task_thread_info(prev_p)->status & TS_USEDFPU) {
 		__save_init_fpu(prev_p); /* _not_ save_init_fpu() */
-		mcl->op      = __HYPERVISOR_fpu_taskswitch;
-		mcl->args[0] = 1;
-		mcl++;
+		if (!preload_fpu) {
+			mcl->op      = __HYPERVISOR_fpu_taskswitch;
+			mcl->args[0] = 1;
+			mcl++;
+		}
 	}
 #if 0 /* lazy fpu sanity check */
 	else BUG_ON(!(read_cr0() & 8));
@@ -427,6 +434,14 @@ __switch_to(struct task_struct *prev_p, 
 		mcl++;
 	}
 
+	/* If we're going to preload the fpu context, make sure clts
+	   is run while we're batching the cpu state updates. */
+	if (preload_fpu) {
+		mcl->op      = __HYPERVISOR_fpu_taskswitch;
+		mcl->args[0] = 0;
+		mcl++;
+	}
+
 	if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
 		set_xen_guest_handle(iobmp_op.bitmap,
 				     (char *)next->io_bitmap_ptr);
@@ -451,7 +466,7 @@ __switch_to(struct task_struct *prev_p, 
 		BUG();
 
 	/* we're going to use this soon, after a few expensive things */
-	if (next_p->fpu_counter > 5)
+	if (preload_fpu)
 		prefetch(next->xstate);
 
 	/*
@@ -470,15 +485,8 @@ __switch_to(struct task_struct *prev_p, 
 	 */
 	arch_end_context_switch(next_p);
 
-	/* If the task has used fpu the last 5 timeslices, just do a full
-	 * restore of the math state immediately to avoid the trap; the
-	 * chances of needing FPU soon are obviously high now
-	 *
-	 * tsk_used_math() checks prevent calling math_state_restore(),
-	 * which can sleep in the case of !tsk_used_math()
-	 */
-	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
-		math_state_restore();
+	if (preload_fpu)
+		__math_state_restore();
 
 	/*
 	 * Restore %gs if needed (which is common)
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/process_64-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/process_64-xen.c	2010-03-17 14:37:05.000000000 +0100
@@ -64,9 +64,6 @@
 
 asmlinkage extern void ret_from_fork(void);
 
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
 DEFINE_PER_CPU(unsigned long, old_rsp);
 static DEFINE_PER_CPU(unsigned char, is_idle);
 
@@ -402,6 +399,7 @@ __switch_to(struct task_struct *prev_p, 
 #ifndef CONFIG_X86_NO_TSS
 	struct tss_struct *tss = &per_cpu(init_tss, cpu);
 #endif
+	bool preload_fpu;
 #if CONFIG_XEN_COMPAT > 0x030002
 	struct physdev_set_iopl iopl_op;
 	struct physdev_set_iobitmap iobmp_op;
@@ -412,8 +410,15 @@ __switch_to(struct task_struct *prev_p, 
 #endif
 	multicall_entry_t _mcl[8], *mcl = _mcl;
 
+	/*
+	 * If the task has used fpu the last 5 timeslices, just do a full
+	 * restore of the math state immediately to avoid the trap; the
+	 * chances of needing FPU soon are obviously high now
+	 */
+	preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
+
 	/* we're going to use this soon, after a few expensive things */
-	if (next_p->fpu_counter > 5)
+	if (preload_fpu)
 		prefetch(next->xstate);
 
 	/*
@@ -425,12 +430,21 @@ __switch_to(struct task_struct *prev_p, 
 	 */
 	if (task_thread_info(prev_p)->status & TS_USEDFPU) {
 		__save_init_fpu(prev_p); /* _not_ save_init_fpu() */
-		mcl->op      = __HYPERVISOR_fpu_taskswitch;
-		mcl->args[0] = 1;
-		mcl++;
+		if (!preload_fpu) {
+			mcl->op      = __HYPERVISOR_fpu_taskswitch;
+			mcl->args[0] = 1;
+			mcl++;
+		}
 	} else
 		prev_p->fpu_counter = 0;
 
+	/* Make sure cpu is ready for new context */
+	if (preload_fpu) {
+		mcl->op      = __HYPERVISOR_fpu_taskswitch;
+		mcl->args[0] = 0;
+		mcl++;
+	}
+
 	/*
 	 * Reload sp0.
 	 * This is load_sp0(tss, next) with a multicall.
@@ -550,15 +564,12 @@ __switch_to(struct task_struct *prev_p, 
 		     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
 		__switch_to_xtra(prev_p, next_p);
 
-	/* If the task has used fpu the last 5 timeslices, just do a full
-	 * restore of the math state immediately to avoid the trap; the
-	 * chances of needing FPU soon are obviously high now
-	 *
-	 * tsk_used_math() checks prevent calling math_state_restore(),
-	 * which can sleep in the case of !tsk_used_math()
+	/*
+	 * Preload the FPU context, now that we've determined that the
+	 * task is likely to be using it.
 	 */
-	if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
-		math_state_restore();
+	if (preload_fpu)
+		__math_state_restore();
 	return prev_p;
 }
 
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/quirks-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/quirks-xen.c	2009-11-06 10:52:23.000000000 +0100
@@ -509,7 +509,7 @@ static void __init quirk_amd_nb_node(str
 
 	pci_read_config_dword(nb_ht, 0x60, &val);
 	set_dev_node(&dev->dev, val & 7);
-	pci_dev_put(dev);
+	pci_dev_put(nb_ht);
 }
 
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/rtc.c	2009-11-06 10:51:25.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/rtc.c	2009-11-06 10:52:23.000000000 +0100
@@ -189,8 +189,10 @@ void read_persistent_clock(struct timesp
 	unsigned long retval, flags;
 
 #ifdef CONFIG_XEN
-	if (!is_initial_xendomain())
-		return xen_read_persistent_clock();
+	if (!is_initial_xendomain()) {
+		xen_read_persistent_clock(ts);
+		return;
+	}
 #endif
 	spin_lock_irqsave(&rtc_lock, flags);
 	retval = x86_platform.get_wallclock();
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/setup-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/setup-xen.c	2009-11-18 14:54:16.000000000 +0100
@@ -27,6 +27,7 @@
 #include <linux/screen_info.h>
 #include <linux/ioport.h>
 #include <linux/acpi.h>
+#include <linux/sfi.h>
 #include <linux/apm_bios.h>
 #include <linux/initrd.h>
 #include <linux/bootmem.h>
@@ -66,6 +67,7 @@
 
 #include <linux/percpu.h>
 #include <linux/crash_dump.h>
+#include <linux/tboot.h>
 
 #include <video/edid.h>
 
@@ -138,10 +140,6 @@ start_info_t *xen_start_info;
 EXPORT_SYMBOL(xen_start_info);
 #endif
 
-#ifndef ARCH_SETUP
-#define ARCH_SETUP
-#endif
-
 /*
  * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
  * The direct mapping extends to max_pfn_mapped, so that we can directly access
@@ -164,9 +162,9 @@ int default_cpu_present_to_apicid(int mp
 	return __default_cpu_present_to_apicid(mps_cpu);
 }
 
-int default_check_phys_apicid_present(int boot_cpu_physical_apicid)
+int default_check_phys_apicid_present(int phys_apicid)
 {
-	return __default_check_phys_apicid_present(boot_cpu_physical_apicid);
+	return __default_check_phys_apicid_present(phys_apicid);
 }
 #endif
 
@@ -203,13 +201,6 @@ static struct resource bss_resource = {
 
 
 #ifdef CONFIG_X86_32
-static struct resource video_ram_resource = {
-	.name	= "Video RAM area",
-	.start	= 0xa0000,
-	.end	= 0xbffff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
 /* cpu data as detected by the assembly code in head.S */
 struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1};
 /* common cpu data for all cpus */
@@ -670,7 +661,7 @@ static struct resource standard_io_resou
 		.flags = IORESOURCE_BUSY | IORESOURCE_IO }
 };
 
-static void __init reserve_standard_io_resources(void)
+void __init reserve_standard_io_resources(void)
 {
 	int i;
 
@@ -706,10 +697,6 @@ static int __init setup_elfcorehdr(char 
 early_param("elfcorehdr", setup_elfcorehdr);
 #endif
 
-static struct x86_quirks default_x86_quirks __initdata;
-
-struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
-
 #ifdef CONFIG_X86_RESERVE_LOW_64K
 static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
 {
@@ -742,6 +729,13 @@ static struct dmi_system_id __initdata b
 		},
 	},
 	{
+		.callback = dmi_low_memory_corruption,
+		.ident = "Phoenix/MSC BIOS",
+		.matches = {
+			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"),
+		},
+	},
+	{
 	/*
 	 * AMI BIOS with low memory corruption was found on Intel DG45ID board.
 	 * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
@@ -865,7 +859,7 @@ void __init setup_arch(char **cmdline_p)
 	copy_edid();
 #endif /* CONFIG_XEN */
 
-	ARCH_SETUP
+	x86_init.oem.arch_setup();
 
 	setup_memory_map();
 	parse_setup_data();
@@ -906,6 +900,16 @@ void __init setup_arch(char **cmdline_p)
 	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
 
+#ifdef CONFIG_X86_64
+	/*
+	 * Must call this twice: Once just to detect whether hardware doesn't
+	 * support NX (so that the early EHCI debug console setup can safely
+	 * call set_fixmap(), and then again after parsing early parameters to
+	 * honor the respective command line option.
+	 */
+	check_efer();
+#endif
+
 	parse_early_param();
 
 #ifdef CONFIG_X86_64
@@ -945,12 +949,9 @@ void __init setup_arch(char **cmdline_p)
 	 * VMware detection requires dmi to be available, so this
 	 * needs to be done after dmi_scan_machine, for the BP.
 	 */
-	init_hypervisor(&boot_cpu_data);
+	init_hypervisor_platform();
 
-#ifdef CONFIG_X86_32
-	if (is_initial_xendomain())
-		probe_roms();
-#endif
+	x86_init.resources.probe_roms();
 
 #ifndef CONFIG_XEN
 	/* after parse_early_param, so could debug it */
@@ -1103,10 +1104,11 @@ void __init setup_arch(char **cmdline_p)
 	kvmclock_init();
 #endif
 
-	xen_pagetable_setup_start(swapper_pg_dir);
+	x86_init.paging.pagetable_setup_start(swapper_pg_dir);
 	paging_init();
-	xen_pagetable_setup_done(swapper_pg_dir);
-	paravirt_post_allocator_init();
+	x86_init.paging.pagetable_setup_done(swapper_pg_dir);
+
+	tboot_probe();
 
 #ifdef CONFIG_X86_64
 	map_vsyscall();
@@ -1197,13 +1199,13 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	acpi_boot_init();
 
-#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
+	sfi_init();
+
 	/*
 	 * get boot-time SMP configuration:
 	 */
 	if (smp_found_config)
 		get_smp_config();
-#endif
 
 	prefill_possible_map();
 
@@ -1227,11 +1229,7 @@ void __init setup_arch(char **cmdline_p)
 		e820_reserve_resources();
 #endif
 
-#ifdef CONFIG_X86_32
-	if (is_initial_xendomain())
-		request_resource(&iomem_resource, &video_ram_resource);
-#endif
-	reserve_standard_io_resources();
+	x86_init.resources.reserve_resources();
 
 #ifndef CONFIG_XEN
 	e820_setup_gap();
@@ -1261,80 +1259,25 @@ void __init setup_arch(char **cmdline_p)
 #endif
 #endif
 #endif /* CONFIG_XEN */
+	x86_init.oem.banner();
 }
 
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
-
-/**
- * x86_quirk_intr_init - post gate setup interrupt initialisation
- *
- * Description:
- *	Fill in any interrupts that may have been left out by the general
- *	init_IRQ() routine.  interrupts having to do with the machine rather
- *	than the devices on the I/O bus (like APIC interrupts in intel MP
- *	systems) are started here.
- **/
-void __init x86_quirk_intr_init(void)
-{
-	if (x86_quirks->arch_intr_init) {
-		if (x86_quirks->arch_intr_init())
-			return;
-	}
-}
-
-/**
- * x86_quirk_trap_init - initialise system specific traps
- *
- * Description:
- *	Called as the final act of trap_init().  Used in VISWS to initialise
- *	the various board specific APIC traps.
- **/
-void __init x86_quirk_trap_init(void)
-{
-	if (x86_quirks->arch_trap_init) {
-		if (x86_quirks->arch_trap_init())
-			return;
-	}
-}
+#ifdef CONFIG_X86_32
 
-static struct irqaction irq0  = {
-	.handler = timer_interrupt,
-	.flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
-	.name = "timer"
+static struct resource video_ram_resource = {
+	.name	= "Video RAM area",
+	.start	= 0xa0000,
+	.end	= 0xbffff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
 };
 
-/**
- * x86_quirk_pre_time_init - do any specific initialisations before.
- *
- **/
-void __init x86_quirk_pre_time_init(void)
+void __init i386_reserve_resources(void)
 {
-	if (x86_quirks->arch_pre_time_init)
-		x86_quirks->arch_pre_time_init();
+	if (is_initial_xendomain())
+		request_resource(&iomem_resource, &video_ram_resource);
+	reserve_standard_io_resources();
 }
 
-/**
- * x86_quirk_time_init - do any specific initialisations for the system timer.
- *
- * Description:
- *	Must plug the system timer interrupt source at HZ into the IRQ listed
- *	in irq_vectors.h:TIMER_IRQ
- **/
-void __init x86_quirk_time_init(void)
-{
-	if (x86_quirks->arch_time_init) {
-		/*
-		 * A nonzero return code does not mean failure, it means
-		 * that the architecture quirk does not want any
-		 * generic (timer) setup to be performed after this:
-		 */
-		if (x86_quirks->arch_time_init())
-			return;
-	}
-
-	irq0.mask = cpumask_of_cpu(0);
-	setup_irq(0, &irq0);
-}
 #endif /* CONFIG_X86_32 */
 
 #ifdef CONFIG_XEN
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/sfi.c	2010-03-31 09:52:27.000000000 +0200
+++ sle11sp1-2010-03-29/arch/x86/kernel/sfi.c	2009-11-06 10:52:23.000000000 +0100
@@ -31,7 +31,7 @@
 #include <asm/setup.h>
 #include <asm/apic.h>
 
-#ifdef CONFIG_X86_LOCAL_APIC
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
 static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
 
 void __init mp_sfi_register_lapic_address(unsigned long address)
@@ -99,9 +99,12 @@ static int __init sfi_parse_ioapic(struc
 		pentry++;
 	}
 
+#ifndef CONFIG_XEN
 	WARN(pic_mode, KERN_WARNING
 		"SFI: pic_mod shouldn't be 1 when IOAPIC table is present\n");
 	pic_mode = 0;
+#endif
+
 	return 0;
 }
 #endif /* CONFIG_X86_IO_APIC */
@@ -111,7 +114,7 @@ static int __init sfi_parse_ioapic(struc
  */
 int __init sfi_platform_init(void)
 {
-#ifdef CONFIG_X86_LOCAL_APIC
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
 	mp_sfi_register_lapic_address(sfi_lapic_addr);
 	sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus);
 #endif
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/time-xen.c	2010-02-04 09:42:47.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/time-xen.c	2010-02-09 17:07:46.000000000 +0100
@@ -1,31 +1,12 @@
 /*
- *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
+ *  Copyright (c) 1991,1992,1995  Linus Torvalds
+ *  Copyright (c) 1994  Alan Modra
+ *  Copyright (c) 1995  Markus Kuhn
+ *  Copyright (c) 1996  Ingo Molnar
+ *  Copyright (c) 1998  Andrea Arcangeli
+ *  Copyright (c) 2002,2006  Vojtech Pavlik
+ *  Copyright (c) 2003  Andi Kleen
  *
- * This file contains the PC-specific time handling details:
- * reading the RTC at bootup, etc..
- * 1994-07-02    Alan Modra
- *	fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
- * 1995-03-26    Markus Kuhn
- *      fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
- *      precision CMOS clock update
- * 1996-05-03    Ingo Molnar
- *      fixed time warps in do_[slow|fast]_gettimeoffset()
- * 1997-09-10	Updated NTP code according to technical memorandum Jan '96
- *		"A Kernel Model for Precision Timekeeping" by Dave Mills
- * 1998-09-05    (Various)
- *	More robust do_fast_gettimeoffset() algorithm implemented
- *	(works with APM, Cyrix 6x86MX and Centaur C6),
- *	monotonic gettimeofday() with fast_get_timeoffset(),
- *	drift-proof precision TSC calibration on boot
- *	(C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
- *	Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
- *	ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
- * 1998-12-16    Andrea Arcangeli
- *	Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
- *	because was not accounting lost_ticks.
- * 1998-12-24 Copyright (C) 1998  Andrea Arcangeli
- *	Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
- *	serialize accesses to xtime/lost_ticks).
  */
 
 #include <linux/init.h>
@@ -39,6 +20,7 @@
 #include <linux/clocksource.h>
 #include <linux/sysdev.h>
 
+#include <asm/vsyscall.h>
 #include <asm/delay.h>
 #include <asm/time.h>
 #include <asm/timer.h>
@@ -52,7 +34,6 @@ DEFINE_SPINLOCK(i8253_lock);
 EXPORT_SYMBOL(i8253_lock);
 
 #ifdef CONFIG_X86_64
-#include <asm/vsyscall.h>
 volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
 #endif
 
@@ -415,38 +396,33 @@ unsigned long profile_pc(struct pt_regs 
 {
 	unsigned long pc = instruction_pointer(regs);
 
-#if defined(CONFIG_SMP) || defined(__x86_64__)
 	if (!user_mode_vm(regs) && in_lock_functions(pc)) {
-# ifdef CONFIG_FRAME_POINTER
+#ifdef CONFIG_FRAME_POINTER
 		return *(unsigned long *)(regs->bp + sizeof(long));
-# else
-#  ifdef __i386__
-		unsigned long *sp = (unsigned long *)&regs->sp;
-#  else
-		unsigned long *sp = (unsigned long *)regs->sp;
-#  endif
-
-		/* Return address is either directly at stack pointer
-		   or above a saved flags. Eflags has bits 22-31 zero,
-		   kernel addresses don't. */
+#else
+		unsigned long *sp =
+			(unsigned long *)kernel_stack_pointer(regs);
+
+		/*
+		 * Return address is either directly at stack pointer
+		 * or above a saved flags. Eflags has bits 22-31 zero,
+		 * kernel addresses don't.
+		 */
 		if (sp[0] >> 22)
 			return sp[0];
 		if (sp[1] >> 22)
 			return sp[1];
-# endif
-	}
 #endif
+	}
 
 	return pc;
 }
 EXPORT_SYMBOL(profile_pc);
 
 /*
- * This is the same as the above, except we _also_ save the current
- * Time Stamp Counter value at the time of the timer interrupt, so that
- * we later on can estimate the time of day more exactly.
+ * Default timer interrupt handler
  */
-irqreturn_t timer_interrupt(int irq, void *dev_id)
+static irqreturn_t timer_interrupt(int irq, void *dev_id)
 {
 	s64 delta, delta_cpu, stolen, blocked;
 	unsigned int i, cpu = smp_processor_id();
@@ -566,8 +542,7 @@ irqreturn_t timer_interrupt(int irq, voi
 
 	/* Local timer processing (see update_process_times()). */
 	run_local_timers();
-	if (rcu_pending(cpu))
-		rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs()));
+	rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs()));
 	printk_tick();
 	scheduler_tick();
 	run_posix_cpu_timers(current);
@@ -667,7 +642,7 @@ static void init_missing_ticks_accountin
 		runstate->time[RUNSTATE_offline];
 }
 
-unsigned long xen_read_persistent_clock(void)
+void xen_read_persistent_clock(struct timespec *ts)
 {
 	const shared_info_t *s = HYPERVISOR_shared_info;
 	u32 version, sec, nsec;
@@ -684,7 +659,8 @@ unsigned long xen_read_persistent_clock(
 	delta = local_clock() + (u64)sec * NSEC_PER_SEC + nsec;
 	do_div(delta, NSEC_PER_SEC);
 
-	return delta;
+	ts->tv_sec = delta;
+	ts->tv_nsec = 0;
 }
 
 int xen_update_persistent_clock(void)
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/traps-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/traps-xen.c	2009-11-06 10:52:23.000000000 +0100
@@ -14,7 +14,6 @@
 #include <linux/spinlock.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
-#include <linux/utsname.h>
 #include <linux/kdebug.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -59,12 +58,12 @@
 #include <asm/mach_traps.h>
 
 #ifdef CONFIG_X86_64
+#include <asm/x86_init.h>
 #include <asm/pgalloc.h>
 #include <asm/proto.h>
 #else
 #include <asm/processor-flags.h>
 #include <asm/setup.h>
-#include <asm/traps.h>
 
 asmlinkage int system_call(void);
 
@@ -74,11 +73,9 @@ char ignore_fpu_irq;
 #ifndef CONFIG_X86_NO_IDT
 /*
  * The IDT has to be page-aligned to simplify the Pentium
- * F0 0F bug workaround.. We have a special link segment
- * for this.
+ * F0 0F bug workaround.
  */
-gate_desc idt_table[256]
-	__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
+gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
 #endif
 #endif
 
@@ -780,27 +777,6 @@ do_spurious_interrupt_bug(struct pt_regs
 #endif
 }
 
-#ifdef CONFIG_X86_32
-unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
-{
-	struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
-	unsigned long base = (kesp - uesp) & -THREAD_SIZE;
-	unsigned long new_kesp = kesp - base;
-	unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
-	__u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
-
-	/* Set up base for espfix segment */
-	desc &= 0x00f0ff0000000000ULL;
-	desc |=	((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
-		((((__u64)base) << 32) & 0xff00000000000000ULL) |
-		((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
-		(lim_pages & 0xffff);
-	*(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
-
-	return new_kesp;
-}
-#endif
-
 asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
 {
 }
@@ -811,6 +787,28 @@ asmlinkage void __attribute__((weak)) sm
 #endif /* CONFIG_XEN */
 
 /*
+ * __math_state_restore assumes that cr0.TS is already clear and the
+ * fpu state is all ready for use.  Used during context switch.
+ */
+void __math_state_restore(void)
+{
+	struct thread_info *thread = current_thread_info();
+	struct task_struct *tsk = thread->task;
+
+	/*
+	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+	 */
+	if (unlikely(restore_fpu_checking(tsk))) {
+		stts();
+		force_sig(SIGSEGV, tsk);
+		return;
+	}
+
+	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
+	tsk->fpu_counter++;
+}
+
+/*
  * 'math_state_restore()' saves the current math information in the
  * old math state array, and gets the new ones from the current task
  *
@@ -841,17 +839,7 @@ asmlinkage void math_state_restore(void)
 	}
 
 	/* NB. 'clts' is done for us by Xen during virtual trap. */
-	/*
-	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
-	 */
-	if (unlikely(restore_fpu_checking(tsk))) {
-		stts();
-		force_sig(SIGSEGV, tsk);
-		return;
-	}
-
-	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
-	tsk->fpu_counter++;
+	__math_state_restore();
 }
 EXPORT_SYMBOL_GPL(math_state_restore);
 
@@ -967,6 +955,8 @@ void __init trap_init(void)
 	 * Should be a barrier for any external CPU state:
 	 */
 	cpu_init();
+
+	x86_init.irqs.trap_init();
 }
 
 void __cpuinit smp_trap_init(trap_info_t *trap_ctxt)
--- sle11sp1-2010-03-29.orig/arch/x86/kernel/vsyscall_64-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/kernel/vsyscall_64-xen.c	2009-11-06 10:52:23.000000000 +0100
@@ -87,6 +87,7 @@ void update_vsyscall(struct timespec *wa
 	vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
 	vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
 	vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
+	vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
 	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
 }
 
@@ -227,19 +228,11 @@ static long __vsyscall(3) venosys_1(void
 }
 
 #ifdef CONFIG_SYSCTL
-
-static int
-vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
-		       void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-}
-
 static ctl_table kernel_table2[] = {
 	{ .procname = "vsyscall64",
 	  .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
 	  .mode = 0644,
-	  .proc_handler = vsyscall_sysctl_change },
+	  .proc_handler = proc_dointvec },
 	{}
 };
 
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ sle11sp1-2010-03-29/arch/x86/kernel/x86_init-xen.c	2009-11-06 10:52:23.000000000 +0100
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2009 Thomas Gleixner <tglx@linutronix.de>
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+#include <linux/bitmap.h>
+#include <linux/init.h>
+#include <linux/threads.h>
+
+#include <asm/mpspec.h>
+#include <asm/setup.h>
+#include <asm/apic.h>
+#include <asm/e820.h>
+#include <asm/time.h>
+#include <asm/irq.h>
+
+void __cpuinit x86_init_noop(void) { }
+void __init x86_init_uint_noop(unsigned int unused) { }
+void __init x86_init_pgd_noop(pgd_t *unused) { }
+
+/*
+ * The platform setup functions are preset with the default functions
+ * for standard PC hardware.
+ */
+struct x86_init_ops x86_init __initdata = {
+
+	.resources = {
+		.probe_roms		= x86_init_noop,
+		.reserve_resources	= reserve_standard_io_resources,
+		.memory_setup		= default_machine_specific_memory_setup,
+	},
+
+	.mpparse = {
+		.mpc_record		= x86_init_uint_noop,
+		.setup_ioapic_ids	= x86_init_noop,
+		.mpc_apic_id		= NULL,
+		.smp_read_mpc_oem	= default_smp_read_mpc_oem,
+		.mpc_oem_bus_info	= default_mpc_oem_bus_info,
+		.find_smp_config	= default_find_smp_config,
+		.get_smp_config		= default_get_smp_config,
+	},
+
+	.irqs = {
+		.pre_vector_init	= NULL,
+		.intr_init		= NULL,
+		.trap_init		= x86_init_noop,
+	},
+
+	.oem = {
+		.arch_setup		= xen_arch_setup,
+		.banner			= x86_init_noop,
+	},
+
+	.paging = {
+		.pagetable_setup_start	= x86_init_pgd_noop,
+		.pagetable_setup_done	= x86_init_pgd_noop,
+	},
+
+	.timers = {
+		.setup_percpu_clockev	= NULL,
+		.tsc_pre_init		= x86_init_noop,
+		.timer_init		= x86_init_noop,
+	},
+};
+
+struct x86_platform_ops x86_platform = {
+	.calibrate_tsc			= NULL,
+	.get_wallclock			= mach_get_cmos_time,
+	.set_wallclock			= mach_set_rtc_mmss,
+};
--- sle11sp1-2010-03-29.orig/arch/x86/mm/fault-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/mm/fault-xen.c	2009-11-06 10:52:23.000000000 +0100
@@ -10,7 +10,7 @@
 #include <linux/bootmem.h>		/* max_low_pfn			*/
 #include <linux/kprobes.h>		/* __kprobes, ...		*/
 #include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
-#include <linux/perf_counter.h>		/* perf_swcounter_event		*/
+#include <linux/perf_event.h>		/* perf_sw_event		*/
 
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
 #include <asm/pgalloc.h>		/* pgd_*(), ...			*/
@@ -167,6 +167,7 @@ force_sig_info_fault(int si_signo, int s
 	info.si_errno	= 0;
 	info.si_code	= si_code;
 	info.si_addr	= (void __user *)address;
+	info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
 
 	force_sig_info(si_signo, &info, tsk);
 }
@@ -293,27 +294,25 @@ check_v8086_mode(struct pt_regs *regs, u
 		tsk->thread.screen_bitmap |= 1 << bit;
 }
 
-static void dump_pagetable(unsigned long address)
+static bool low_pfn(unsigned long pfn)
 {
-	__typeof__(pte_val(__pte(0))) page;
+	return pfn < max_low_pfn;
+}
 
-	page = read_cr3();
-	page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
+static void dump_pagetable(unsigned long address)
+{
+	pgd_t *base = __va(read_cr3());
+	pgd_t *pgd = &base[pgd_index(address)];
+	pmd_t *pmd;
+	pte_t *pte;
 
 #ifdef CONFIG_X86_PAE
-	printk("*pdpt = %016Lx ", page);
-	if ((page & _PAGE_PRESENT)
-	    && mfn_to_local_pfn(page >> PAGE_SHIFT) < max_low_pfn) {
-		page = mfn_to_pfn(page >> PAGE_SHIFT);
-		page <<= PAGE_SHIFT;
-		page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
-							& (PTRS_PER_PMD - 1)];
-		printk(KERN_CONT "*pde = %016Lx ", page);
-		page &= ~_PAGE_NX;
-	}
-#else
-	printk("*pde = %08lx ", page);
+	printk("*pdpt = %016Lx ", __pgd_val(*pgd));
+	if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
+		goto out;
 #endif
+	pmd = pmd_offset(pud_offset(pgd, address), address);
+	printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)__pmd_val(*pmd));
 
 	/*
 	 * We must not directly access the pte in the highpte
@@ -321,17 +320,12 @@ static void dump_pagetable(unsigned long
 	 * And let's rather not kmap-atomic the pte, just in case
 	 * it's allocated already:
 	 */
-	if ((page & _PAGE_PRESENT)
-	    && mfn_to_local_pfn(page >> PAGE_SHIFT) < max_low_pfn
-	    && !(page & _PAGE_PSE)) {
-
-		page = mfn_to_pfn(page >> PAGE_SHIFT);
-		page <<= PAGE_SHIFT;
-		page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
-							& (PTRS_PER_PTE - 1)];
-		printk(KERN_CONT "*pte = %0*Lx ", sizeof(page)*2, (u64)page);
-	}
+	if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd))
+		goto out;
 
+	pte = pte_offset_kernel(pmd, address);
+	printk(KERN_CONT "*pte = %0*Lx ", sizeof(*pte) * 2, (u64)__pte_val(*pte));
+out:
 	printk(KERN_CONT "\n");
 }
 
@@ -460,16 +454,12 @@ static int bad_address(void *p)
 
 static void dump_pagetable(unsigned long address)
 {
-	pgd_t *pgd;
+	pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+	pgd_t *pgd = base + pgd_index(address);
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
 
-	pgd = (pgd_t *)read_cr3();
-
-	pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
-
-	pgd += pgd_index(address);
 	if (bad_address(pgd))
 		goto bad;
 
@@ -809,10 +799,12 @@ out_of_memory(struct pt_regs *regs, unsi
 }
 
 static void
-do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
+	  unsigned int fault)
 {
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
+	int code = BUS_ADRERR;
 
 	up_read(&mm->mmap_sem);
 
@@ -828,7 +820,15 @@ do_sigbus(struct pt_regs *regs, unsigned
 	tsk->thread.error_code	= error_code;
 	tsk->thread.trap_no	= 14;
 
-	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+#ifdef CONFIG_MEMORY_FAILURE
+	if (fault & VM_FAULT_HWPOISON) {
+		printk(KERN_ERR
+	"MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
+			tsk->comm, tsk->pid, address);
+		code = BUS_MCEERR_AR;
+	}
+#endif
+	force_sig_info_fault(SIGBUS, code, address, tsk);
 }
 
 static noinline void
@@ -838,8 +838,8 @@ mm_fault_error(struct pt_regs *regs, uns
 	if (fault & VM_FAULT_OOM) {
 		out_of_memory(regs, error_code, address);
 	} else {
-		if (fault & VM_FAULT_SIGBUS)
-			do_sigbus(regs, error_code, address);
+		if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
+			do_sigbus(regs, error_code, address, fault);
 		else
 			BUG();
 	}
@@ -1053,7 +1053,7 @@ do_page_fault(struct pt_regs *regs, unsi
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(regs, error_code, address);
 
-	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt, have no user context or are running
@@ -1150,11 +1150,11 @@ good_area:
 
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 
--- sle11sp1-2010-03-29.orig/arch/x86/mm/highmem_32-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/mm/highmem_32-xen.c	2009-11-06 10:52:23.000000000 +0100
@@ -24,7 +24,7 @@ void kunmap(struct page *page)
  * no global lock is needed and because the kmap code must perform a global TLB
  * invalidation when the kmap pool wraps.
  *
- * However when holding an atomic kmap is is not legal to sleep, so atomic
+ * However when holding an atomic kmap it is not legal to sleep, so atomic
  * kmaps are appropriate for short, tight code paths only.
  */
 void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
@@ -149,9 +149,7 @@ EXPORT_SYMBOL(kunmap);
 EXPORT_SYMBOL(kmap_atomic);
 EXPORT_SYMBOL(kunmap_atomic);
 EXPORT_SYMBOL(kmap_atomic_prot);
-#ifdef CONFIG_HIGHPTE
 EXPORT_SYMBOL(kmap_atomic_to_page);
-#endif
 EXPORT_SYMBOL(clear_highpage);
 EXPORT_SYMBOL(copy_highpage);
 
--- sle11sp1-2010-03-29.orig/arch/x86/mm/init-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/mm/init-xen.c	2009-11-06 10:52:23.000000000 +0100
@@ -36,69 +36,6 @@ extern unsigned long extend_init_mapping
 extern void xen_finish_init_mapping(void);
 #endif
 
-int nx_enabled;
-
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
-static int disable_nx __cpuinitdata;
-
-/*
- * noexec = on|off
- *
- * Control non-executable mappings for processes.
- *
- * on      Enable
- * off     Disable
- */
-static int __init noexec_setup(char *str)
-{
-	if (!str)
-		return -EINVAL;
-	if (!strncmp(str, "on", 2)) {
-		__supported_pte_mask |= _PAGE_NX;
-		disable_nx = 0;
-	} else if (!strncmp(str, "off", 3)) {
-		disable_nx = 1;
-		__supported_pte_mask &= ~_PAGE_NX;
-	}
-	return 0;
-}
-early_param("noexec", noexec_setup);
-#endif
-
-#ifdef CONFIG_X86_PAE
-static void __init set_nx(void)
-{
-	unsigned int v[4], l, h;
-
-	if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
-		cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
-
-		if ((v[3] & (1 << 20)) && !disable_nx) {
-			rdmsr(MSR_EFER, l, h);
-			l |= EFER_NX;
-			wrmsr(MSR_EFER, l, h);
-			nx_enabled = 1;
-			__supported_pte_mask |= _PAGE_NX;
-		}
-	}
-}
-#else
-static inline void set_nx(void)
-{
-}
-#endif
-
-#ifdef CONFIG_X86_64
-void __cpuinit check_efer(void)
-{
-	unsigned long efer;
-
-	rdmsrl(MSR_EFER, efer);
-	if (!(efer & EFER_NX) || disable_nx)
-		__supported_pte_mask &= ~_PAGE_NX;
-}
-#endif
-
 static void __init find_early_table_space(unsigned long end, int use_pse,
 					  int use_gbpages)
 {
--- sle11sp1-2010-03-29.orig/arch/x86/mm/init_32-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/mm/init_32-xen.c	2009-11-06 10:52:23.000000000 +0100
@@ -87,7 +87,7 @@ static pmd_t * __init one_md_table_init(
 #ifdef CONFIG_X86_PAE
 	if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) {
 		if (after_bootmem)
-			pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+			pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE);
 		else
 			pmd_table = (pmd_t *)alloc_low_page();
 		paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
@@ -124,7 +124,7 @@ static pte_t * __init one_page_table_ini
 #endif
 			if (!page_table)
 				page_table =
-				(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+				(pte_t *)alloc_bootmem_pages(PAGE_SIZE);
 		} else
 			page_table = (pte_t *)alloc_low_page();
 
@@ -914,8 +914,6 @@ static void __init test_wp_bit(void)
 	}
 }
 
-static struct kcore_list kcore_mem, kcore_vmalloc;
-
 void __init mem_init(void)
 {
 	int codesize, reservedpages, datasize, initsize;
@@ -949,13 +947,9 @@ void __init mem_init(void)
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
-	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END-VMALLOC_START);
-
 	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
 			"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
-		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+		nr_free_pages() << (PAGE_SHIFT-10),
 		num_physpages << (PAGE_SHIFT-10),
 		codesize >> 10,
 		reservedpages << (PAGE_SHIFT-10),
--- sle11sp1-2010-03-29.orig/arch/x86/mm/init_64-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/mm/init_64-xen.c	2009-11-06 10:52:23.000000000 +0100
@@ -894,8 +894,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to
 
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
-			 kcore_modules, kcore_vsyscall;
+static struct kcore_list kcore_vsyscall;
 
 void __init mem_init(void)
 {
@@ -931,17 +930,12 @@ void __init mem_init(void)
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
 	/* Register memory areas for /proc/kcore */
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
-	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END-VMALLOC_START);
-	kclist_add(&kcore_kernel, &_stext, _end - _stext);
-	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
 	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
-				 VSYSCALL_END - VSYSCALL_START);
+			 VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
 			 "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
-		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+		nr_free_pages() << (PAGE_SHIFT-10),
 		max_pfn << (PAGE_SHIFT-10),
 		codesize >> 10,
 		absent_pages << (PAGE_SHIFT-10),
--- sle11sp1-2010-03-29.orig/arch/x86/mm/iomap_32-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/mm/iomap_32-xen.c	2009-11-06 10:52:23.000000000 +0100
@@ -22,7 +22,7 @@
 #include <linux/module.h>
 #include <linux/highmem.h>
 
-int is_io_mapping_possible(resource_size_t base, unsigned long size)
+static int is_io_mapping_possible(resource_size_t base, unsigned long size)
 {
 #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT)
 	/* There is no way to map greater than 1 << 32 address without PAE */
@@ -31,7 +31,30 @@ int is_io_mapping_possible(resource_size
 #endif
 	return 1;
 }
-EXPORT_SYMBOL_GPL(is_io_mapping_possible);
+
+int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
+{
+	unsigned long flag = _PAGE_CACHE_WC;
+	int ret;
+
+	if (!is_io_mapping_possible(base, size))
+		return -EINVAL;
+
+	ret = io_reserve_memtype(base, base + size, &flag);
+	if (ret)
+		return ret;
+
+	*prot = __pgprot(__PAGE_KERNEL | flag);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iomap_create_wc);
+
+void
+iomap_free(resource_size_t base, unsigned long size)
+{
+	io_free_memtype(base, base + size);
+}
+EXPORT_SYMBOL_GPL(iomap_free);
 
 void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
 {
--- sle11sp1-2010-03-29.orig/arch/x86/mm/ioremap-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/mm/ioremap-xen.c	2009-11-18 14:56:06.000000000 +0100
@@ -23,81 +23,7 @@
 #include <asm/pgalloc.h>
 #include <asm/pat.h>
 
-static inline int phys_addr_valid(resource_size_t addr)
-{
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
-	return !(addr >> boot_cpu_data.x86_phys_bits);
-#else
-	return 1;
-#endif
-}
-
-#ifdef CONFIG_X86_64
-
-#define phys_base 0
-
-unsigned long __phys_addr(unsigned long x)
-{
-	if (x >= __START_KERNEL_map) {
-		x -= __START_KERNEL_map;
-		VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE);
-		x += phys_base;
-	} else {
-		VIRTUAL_BUG_ON(x < PAGE_OFFSET);
-		x -= PAGE_OFFSET;
-		VIRTUAL_BUG_ON(!phys_addr_valid(x));
-	}
-	return x;
-}
-EXPORT_SYMBOL(__phys_addr);
-
-bool __virt_addr_valid(unsigned long x)
-{
-	if (x >= __START_KERNEL_map) {
-		x -= __START_KERNEL_map;
-		if (x >= KERNEL_IMAGE_SIZE)
-			return false;
-		x += phys_base;
-	} else {
-		if (x < PAGE_OFFSET)
-			return false;
-		x -= PAGE_OFFSET;
-		if (!phys_addr_valid(x))
-			return false;
-	}
-
-	return pfn_valid(x >> PAGE_SHIFT);
-}
-EXPORT_SYMBOL(__virt_addr_valid);
-
-#undef phys_base
-
-#else
-
-#ifdef CONFIG_DEBUG_VIRTUAL
-unsigned long __phys_addr(unsigned long x)
-{
-	/* VMALLOC_* aren't constants  */
-	VIRTUAL_BUG_ON(x < PAGE_OFFSET);
-	VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
-	return x - PAGE_OFFSET;
-}
-EXPORT_SYMBOL(__phys_addr);
-#endif
-
-bool __virt_addr_valid(unsigned long x)
-{
-	if (x < PAGE_OFFSET)
-		return false;
-	if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
-		return false;
-	if (x >= FIXADDR_START)
-		return false;
-	return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
-}
-EXPORT_SYMBOL(__virt_addr_valid);
-
-#endif
+#include "physaddr.h"
 
 static int direct_remap_area_pte_fn(pte_t *pte,
 				    struct page *pmd_page,
@@ -407,30 +333,19 @@ static void __iomem *__ioremap_caller(re
 	retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
 						prot_val, &new_prot_val);
 	if (retval) {
-		pr_debug("Warning: reserve_memtype returned %d\n", retval);
+		printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
 		return NULL;
 	}
 
 	if (prot_val != new_prot_val) {
-		/*
-		 * Do not fallback to certain memory types with certain
-		 * requested type:
-		 * - request is uc-, return cannot be write-back
-		 * - request is uc-, return cannot be write-combine
-		 * - request is write-combine, return cannot be write-back
-		 */
-		if ((prot_val == _PAGE_CACHE_UC_MINUS &&
-		     (new_prot_val == _PAGE_CACHE_WB ||
-		      new_prot_val == _PAGE_CACHE_WC)) ||
-		    (prot_val == _PAGE_CACHE_WC &&
-		     new_prot_val == _PAGE_CACHE_WB)) {
-			pr_debug(
+		if (!is_new_memtype_allowed(phys_addr, size,
+					    prot_val, new_prot_val)) {
+			printk(KERN_ERR
 		"ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
 				(unsigned long long)phys_addr,
 				(unsigned long long)(phys_addr + size),
 				prot_val, new_prot_val);
-			free_memtype(phys_addr, phys_addr + size);
-			return NULL;
+			goto err_free_memtype;
 		}
 		prot_val = new_prot_val;
 	}
@@ -456,27 +371,26 @@ static void __iomem *__ioremap_caller(re
 	 */
 	area = get_vm_area_caller(size, VM_IOREMAP, caller);
 	if (!area)
-		return NULL;
+		goto err_free_memtype;
 	area->phys_addr = phys_addr;
 	vaddr = (unsigned long) area->addr;
 
-	if (kernel_map_sync_memtype(phys_addr, size, prot_val)) {
-		free_memtype(phys_addr, phys_addr + size);
-		free_vm_area(area);
-		return NULL;
-	}
+	if (kernel_map_sync_memtype(phys_addr, size, prot_val))
+		goto err_free_area;
 
 	if (__direct_remap_pfn_range(&init_mm, vaddr, PFN_DOWN(phys_addr),
-				     size, prot, domid)) {
-		free_memtype(phys_addr, phys_addr + size);
-		free_vm_area(area);
-		return NULL;
-	}
+				     size, prot, domid))
+		goto err_free_area;
 
 	ret_addr = (void __iomem *) (vaddr + offset);
 	mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
 
 	return ret_addr;
+err_free_area:
+	free_vm_area(area);
+err_free_memtype:
+	free_memtype(phys_addr, phys_addr + size);
+	return NULL;
 }
 
 /**
--- sle11sp1-2010-03-29.orig/arch/x86/mm/pageattr-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/mm/pageattr-xen.c	2009-11-06 10:52:23.000000000 +0100
@@ -12,6 +12,7 @@
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
 #include <linux/pfn.h>
+#include <linux/percpu.h>
 
 #include <asm/e820.h>
 #include <asm/processor.h>
@@ -143,6 +144,7 @@ void clflush_cache_range(void *vaddr, un
 
 	mb();
 }
+EXPORT_SYMBOL_GPL(clflush_cache_range);
 
 static void __cpa_flush_all(void *arg)
 {
@@ -707,7 +709,7 @@ static int cpa_process_alias(struct cpa_
 {
 	struct cpa_data alias_cpa;
 	unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
-	unsigned long vaddr, remapped;
+	unsigned long vaddr;
 	int ret;
 
 	if (cpa->pfn >= max_pfn_mapped)
@@ -765,24 +767,6 @@ static int cpa_process_alias(struct cpa_
 	}
 #endif
 
-	/*
-	 * If the PMD page was partially used for per-cpu remapping,
-	 * the recycled area needs to be split and modified.  Because
-	 * the area is always proper subset of a PMD page
-	 * cpa->numpages is guaranteed to be 1 for these areas, so
-	 * there's no need to loop over and check for further remaps.
-	 */
-	remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr);
-	if (remapped) {
-		WARN_ON(cpa->numpages > 1);
-		alias_cpa = *cpa;
-		alias_cpa.vaddr = &remapped;
-		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
-		ret = __change_page_attr_set_clr(&alias_cpa, 0);
-		if (ret)
-			return ret;
-	}
-
 	return 0;
 }
 
@@ -843,6 +827,7 @@ static int change_page_attr_set_clr(unsi
 {
 	struct cpa_data cpa;
 	int ret, cache, checkalias;
+	unsigned long baddr = 0;
 
 	/*
 	 * Check, if we are requested to change a not supported
@@ -874,6 +859,11 @@ static int change_page_attr_set_clr(unsi
 			 */
 			WARN_ON_ONCE(1);
 		}
+		/*
+		 * Save address for cache flush. *addr is modified in the call
+		 * to __change_page_attr_set_clr() below.
+		 */
+		baddr = *addr;
 	}
 
 	/* Must avoid aliasing mappings in the highmem code */
@@ -921,7 +911,7 @@ static int change_page_attr_set_clr(unsi
 			cpa_flush_array(addr, numpages, cache,
 					cpa.flags, pages);
 		} else
-			cpa_flush_range(*addr, numpages, cache);
+			cpa_flush_range(baddr, numpages, cache);
 	} else
 		cpa_flush_all(cache);
 
--- sle11sp1-2010-03-29.orig/arch/x86/mm/pat-xen.c	2010-02-05 11:17:21.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/mm/pat-xen.c	2010-02-05 11:22:27.000000000 +0100
@@ -15,6 +15,7 @@
 #include <linux/gfp.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
+#include <linux/rbtree.h>
 
 #include <asm/cacheflush.h>
 #include <asm/processor.h>
@@ -80,6 +81,7 @@ enum {
 void pat_init(void)
 {
 	u64 pat;
+	bool boot_cpu = !boot_pat_state;
 
 	if (!pat_enabled)
 		return;
@@ -131,8 +133,10 @@ void pat_init(void)
 	if (!boot_pat_state)
 		boot_pat_state = pat;
 #endif
-	printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
-	       smp_processor_id(), boot_pat_state, pat);
+
+	if (boot_cpu)
+		printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
+		       smp_processor_id(), boot_pat_state, pat);
 }
 
 #undef PAT
@@ -160,11 +164,10 @@ static char *cattr_name(unsigned long fl
  * areas). All the aliases have the same cache attributes of course.
  * Zero attributes are represented as holes.
  *
- * Currently the data structure is a list because the number of mappings
- * are expected to be relatively small. If this should be a problem
- * it could be changed to a rbtree or similar.
+ * The data structure is a list that is also organized as an rbtree
+ * sorted on the start address of memtype range.
  *
- * memtype_lock protects the whole list.
+ * memtype_lock protects both the linear list and rbtree.
  */
 
 struct memtype {
@@ -172,11 +175,53 @@ struct memtype {
 	u64			end;
 	unsigned long		type;
 	struct list_head	nd;
+	struct rb_node		rb;
 };
 
+static struct rb_root memtype_rbroot = RB_ROOT;
 static LIST_HEAD(memtype_list);
 static DEFINE_SPINLOCK(memtype_lock);	/* protects memtype list */
 
+static struct memtype *memtype_rb_search(struct rb_root *root, u64 start)
+{
+	struct rb_node *node = root->rb_node;
+	struct memtype *last_lower = NULL;
+
+	while (node) {
+		struct memtype *data = container_of(node, struct memtype, rb);
+
+		if (data->start < start) {
+			last_lower = data;
+			node = node->rb_right;
+		} else if (data->start > start) {
+			node = node->rb_left;
+		} else
+			return data;
+	}
+
+	/* Will return NULL if there is no entry with its start <= start */
+	return last_lower;
+}
+
+static void memtype_rb_insert(struct rb_root *root, struct memtype *data)
+{
+	struct rb_node **new = &(root->rb_node);
+	struct rb_node *parent = NULL;
+
+	while (*new) {
+		struct memtype *this = container_of(*new, struct memtype, rb);
+
+		parent = *new;
+		if (data->start <= this->start)
+			new = &((*new)->rb_left);
+		else if (data->start > this->start)
+			new = &((*new)->rb_right);
+	}
+
+	rb_link_node(&data->rb, parent, new);
+	rb_insert_color(&data->rb, root);
+}
+
 static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end);
 static inline u8 _mtrr_type_lookup(u64 start, u64 end)
 {
@@ -240,9 +285,6 @@ chk_conflict(struct memtype *new, struct
 	return -EBUSY;
 }
 
-static struct memtype *cached_entry;
-static u64 cached_start;
-
 static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end)
 {
 	int ram_page = 0, not_rampage = 0;
@@ -271,69 +313,65 @@ static int pat_pagerange_is_ram(resource
 }
 
 /*
- * For RAM pages, mark the pages as non WB memory type using
- * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
- * set_memory_wc() on a RAM page at a time before marking it as WB again.
- * This is ok, because only one driver will be owning the page and
- * doing set_memory_*() calls.
- *
- * For now, we use PageNonWB to track that the RAM page is being mapped
- * as non WB. In future, we will have to use one more flag
- * (or some other mechanism in page_struct) to distinguish between
- * UC and WC mapping.
+ * For RAM pages, we use page flags to mark the pages with appropriate type.
+ * Here we do two pass:
+ * - Find the memtype of all the pages in the range, look for any conflicts
+ * - In case of no conflicts, set the new memtype for pages in the range
+ *
+ * Caller must hold memtype_lock for atomicity.
  */
 static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
 				  unsigned long *new_type)
 {
 	struct page *page;
-	unsigned long mfn, end_mfn;
+	unsigned long mfn;
+
+	if (req_type == _PAGE_CACHE_UC) {
+		/* We do not support strong UC */
+		WARN_ON_ONCE(1);
+		req_type = _PAGE_CACHE_UC_MINUS;
+	}
 
 	for (mfn = (start >> PAGE_SHIFT); mfn < (end >> PAGE_SHIFT); ++mfn) {
-		unsigned long pfn = mfn_to_local_pfn(mfn);
+		unsigned long type, pfn = mfn_to_local_pfn(mfn);
 
 		BUG_ON(!pfn_valid(pfn));
 		page = pfn_to_page(pfn);
-		if (page_mapped(page) || PageNonWB(page))
-			goto out;
+		type = get_page_memtype(page);
+		if (type != -1) {
+			printk(KERN_INFO "reserve_ram_pages_type failed "
+				"0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n",
+				start, end, type, req_type);
+			if (new_type)
+				*new_type = type;
 
-		SetPageNonWB(page);
+			return -EBUSY;
+		}
 	}
-	return 0;
 
-out:
-	end_mfn = mfn;
-	for (mfn = (start >> PAGE_SHIFT); mfn < end_mfn; ++mfn) {
+	if (new_type)
+		*new_type = req_type;
+
+	for (mfn = (start >> PAGE_SHIFT); mfn < (end >> PAGE_SHIFT); ++mfn) {
 		page = pfn_to_page(mfn_to_local_pfn(mfn));
-		ClearPageNonWB(page);
+		set_page_memtype(page, req_type);
 	}
-
-	return -EINVAL;
+	return 0;
 }
 
 static int free_ram_pages_type(u64 start, u64 end)
 {
 	struct page *page;
-	unsigned long mfn, end_mfn;
+	unsigned long mfn;
 
 	for (mfn = (start >> PAGE_SHIFT); mfn < (end >> PAGE_SHIFT); ++mfn) {
 		unsigned long pfn = mfn_to_local_pfn(mfn);
 
 		BUG_ON(!pfn_valid(pfn));
 		page = pfn_to_page(pfn);
-		if (page_mapped(page) || !PageNonWB(page))
-			goto out;
-
-		ClearPageNonWB(page);
+		set_page_memtype(page, -1);
 	}
 	return 0;
-
-out:
-	end_mfn = mfn;
-	for (mfn = (start >> PAGE_SHIFT); mfn < end_mfn; ++mfn) {
-		page = pfn_to_page(mfn_to_local_pfn(mfn));
-		SetPageNonWB(page);
-	}
-	return -EINVAL;
 }
 
 /*
@@ -367,6 +405,8 @@ int reserve_memtype(u64 start, u64 end, 
 		if (new_type) {
 			if (req_type == -1)
 				*new_type = _PAGE_CACHE_WB;
+			else if (req_type == _PAGE_CACHE_WC)
+				*new_type = _PAGE_CACHE_UC_MINUS;
 			else
 				*new_type = req_type & _PAGE_CACHE_MASK;
 		}
@@ -392,11 +432,16 @@ int reserve_memtype(u64 start, u64 end, 
 		*new_type = actual_type;
 
 	is_range_ram = pat_pagerange_is_ram(start, end);
-	if (is_range_ram == 1)
-		return reserve_ram_pages_type(start, end, req_type,
-					      new_type);
-	else if (is_range_ram < 0)
+	if (is_range_ram == 1) {
+
+		spin_lock(&memtype_lock);
+		err = reserve_ram_pages_type(start, end, req_type, new_type);
+		spin_unlock(&memtype_lock);
+
+		return err;
+	} else if (is_range_ram < 0) {
 		return -EINVAL;
+	}
 
 	new  = kmalloc(sizeof(struct memtype), GFP_KERNEL);
 	if (!new)
@@ -408,17 +453,11 @@ int reserve_memtype(u64 start, u64 end, 
 
 	spin_lock(&memtype_lock);
 
-	if (cached_entry && start >= cached_start)
-		entry = cached_entry;
-	else
-		entry = list_entry(&memtype_list, struct memtype, nd);
-
 	/* Search for existing mapping that overlaps the current range */
 	where = NULL;
-	list_for_each_entry_continue(entry, &memtype_list, nd) {
+	list_for_each_entry(entry, &memtype_list, nd) {
 		if (end <= entry->start) {
 			where = entry->nd.prev;
-			cached_entry = list_entry(where, struct memtype, nd);
 			break;
 		} else if (start <= entry->start) { /* end > entry->start */
 			err = chk_conflict(new, entry, new_type);
@@ -426,8 +465,6 @@ int reserve_memtype(u64 start, u64 end, 
 				dprintk("Overlap at 0x%Lx-0x%Lx\n",
 					entry->start, entry->end);
 				where = entry->nd.prev;
-				cached_entry = list_entry(where,
-							struct memtype, nd);
 			}
 			break;
 		} else if (start < entry->end) { /* start > entry->start */
@@ -435,8 +472,6 @@ int reserve_memtype(u64 start, u64 end, 
 			if (!err) {
 				dprintk("Overlap at 0x%Lx-0x%Lx\n",
 					entry->start, entry->end);
-				cached_entry = list_entry(entry->nd.prev,
-							struct memtype, nd);
 
 				/*
 				 * Move to right position in the linked
@@ -464,13 +499,13 @@ int reserve_memtype(u64 start, u64 end, 
 		return err;
 	}
 
-	cached_start = start;
-
 	if (where)
 		list_add(&new->nd, where);
 	else
 		list_add_tail(&new->nd, &memtype_list);
 
+	memtype_rb_insert(&memtype_rbroot, new);
+
 	spin_unlock(&memtype_lock);
 
 	dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
@@ -482,7 +517,7 @@ int reserve_memtype(u64 start, u64 end, 
 
 int free_memtype(u64 start, u64 end)
 {
-	struct memtype *entry;
+	struct memtype *entry, *saved_entry;
 	int err = -EINVAL;
 	int is_range_ram;
 
@@ -494,23 +529,58 @@ int free_memtype(u64 start, u64 end)
 		return 0;
 
 	is_range_ram = pat_pagerange_is_ram(start, end);
-	if (is_range_ram == 1)
-		return free_ram_pages_type(start, end);
-	else if (is_range_ram < 0)
+	if (is_range_ram == 1) {
+
+		spin_lock(&memtype_lock);
+		err = free_ram_pages_type(start, end);
+		spin_unlock(&memtype_lock);
+
+		return err;
+	} else if (is_range_ram < 0) {
 		return -EINVAL;
+	}
 
 	spin_lock(&memtype_lock);
-	list_for_each_entry(entry, &memtype_list, nd) {
+
+	entry = memtype_rb_search(&memtype_rbroot, start);
+	if (unlikely(entry == NULL))
+		goto unlock_ret;
+
+	/*
+	 * Saved entry points to an entry with start same or less than what
+	 * we searched for. Now go through the list in both directions to look
+	 * for the entry that matches with both start and end, with list stored
+	 * in sorted start address
+	 */
+	saved_entry = entry;
+	list_for_each_entry_from(entry, &memtype_list, nd) {
 		if (entry->start == start && entry->end == end) {
-			if (cached_entry == entry || cached_start == start)
-				cached_entry = NULL;
+			rb_erase(&entry->rb, &memtype_rbroot);
+			list_del(&entry->nd);
+			kfree(entry);
+			err = 0;
+			break;
+		} else if (entry->start > start) {
+			break;
+		}
+	}
+
+	if (!err)
+		goto unlock_ret;
 
+	entry = saved_entry;
+	list_for_each_entry_reverse(entry, &memtype_list, nd) {
+		if (entry->start == start && entry->end == end) {
+			rb_erase(&entry->rb, &memtype_rbroot);
 			list_del(&entry->nd);
 			kfree(entry);
 			err = 0;
 			break;
+		} else if (entry->start < start) {
+			break;
 		}
 	}
+unlock_ret:
 	spin_unlock(&memtype_lock);
 
 	if (err) {
@@ -524,6 +594,103 @@ int free_memtype(u64 start, u64 end)
 }
 
 
+#ifndef CONFIG_XEN
+/**
+ * lookup_memtype - Looksup the memory type for a physical address
+ * @paddr: physical address of which memory type needs to be looked up
+ *
+ * Only to be called when PAT is enabled
+ *
+ * Returns _PAGE_CACHE_WB, _PAGE_CACHE_WC, _PAGE_CACHE_UC_MINUS or
+ * _PAGE_CACHE_UC
+ */
+static unsigned long lookup_memtype(u64 paddr)
+{
+	int rettype = _PAGE_CACHE_WB;
+	struct memtype *entry;
+
+	if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1))
+		return rettype;
+
+	if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
+		struct page *page;
+		spin_lock(&memtype_lock);
+		page = pfn_to_page(paddr >> PAGE_SHIFT);
+		rettype = get_page_memtype(page);
+		spin_unlock(&memtype_lock);
+		/*
+		 * -1 from get_page_memtype() implies RAM page is in its
+		 * default state and not reserved, and hence of type WB
+		 */
+		if (rettype == -1)
+			rettype = _PAGE_CACHE_WB;
+
+		return rettype;
+	}
+
+	spin_lock(&memtype_lock);
+
+	entry = memtype_rb_search(&memtype_rbroot, paddr);
+	if (entry != NULL)
+		rettype = entry->type;
+	else
+		rettype = _PAGE_CACHE_UC_MINUS;
+
+	spin_unlock(&memtype_lock);
+	return rettype;
+}
+#endif
+
+/**
+ * io_reserve_memtype - Request a memory type mapping for a region of memory
+ * @start: start (physical address) of the region
+ * @end: end (physical address) of the region
+ * @type: A pointer to memtype, with requested type. On success, requested
+ * or any other compatible type that was available for the region is returned
+ *
+ * On success, returns 0
+ * On failure, returns non-zero
+ */
+int io_reserve_memtype(resource_size_t start, resource_size_t end,
+			unsigned long *type)
+{
+	resource_size_t size = end - start;
+	unsigned long req_type = *type;
+	unsigned long new_type;
+	int ret;
+
+	WARN_ON_ONCE(iomem_map_sanity_check(start, size));
+
+	ret = reserve_memtype(start, end, req_type, &new_type);
+	if (ret)
+		goto out_err;
+
+	if (!is_new_memtype_allowed(start, size, req_type, new_type))
+		goto out_free;
+
+	if (kernel_map_sync_memtype(start, size, new_type) < 0)
+		goto out_free;
+
+	*type = new_type;
+	return 0;
+
+out_free:
+	free_memtype(start, end);
+	ret = -EBUSY;
+out_err:
+	return ret;
+}
+
+/**
+ * io_free_memtype - Release a memory type mapping for a region of memory
+ * @start: start (physical address) of the region
+ * @end: end (physical address) of the region
+ */
+void io_free_memtype(resource_size_t start, resource_size_t end)
+{
+	free_memtype(start, end);
+}
+
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long mfn,
 				unsigned long size, pgprot_t vma_prot)
 {
@@ -605,9 +772,6 @@ int phys_mem_access_prot_allowed(struct 
  */
 int kernel_map_sync_memtype(u64 ma, unsigned long size, unsigned long flags)
 {
-	if (!pat_enabled)
-		return 0;
-
 	return ioremap_check_change_attr(ma >> PAGE_SHIFT, size, flags);
 }
 
@@ -628,11 +792,29 @@ static int reserve_pfn_range(u64 paddr, 
 	is_ram = pat_pagerange_is_ram(paddr, paddr + size);
 
 	/*
-	 * reserve_pfn_range() doesn't support RAM pages. Maintain the current
-	 * behavior with RAM pages by returning success.
+	 * reserve_pfn_range() for RAM pages. We do not refcount to keep
+	 * track of number of mappings of RAM pages. We can assert that
+	 * the type requested matches the type of first page in the range.
 	 */
-	if (is_ram != 0)
+	if (is_ram) {
+		if (!pat_enabled)
+			return 0;
+
+		flags = lookup_memtype(paddr);
+		if (want_flags != flags) {
+			printk(KERN_WARNING
+			"%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n",
+				current->comm, current->pid,
+				cattr_name(want_flags),
+				(unsigned long long)paddr,
+				(unsigned long long)(paddr + size),
+				cattr_name(flags));
+			*vma_prot = __pgprot((pgprot_val(*vma_prot) &
+					      (~_PAGE_CACHE_MASK)) |
+					     flags);
+		}
 		return 0;
+	}
 
 	ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
 	if (ret)
@@ -694,14 +876,6 @@ int track_pfn_vma_copy(struct vm_area_st
 	unsigned long vma_size = vma->vm_end - vma->vm_start;
 	pgprot_t pgprot;
 
-	if (!pat_enabled)
-		return 0;
-
-	/*
-	 * For now, only handle remap_pfn_range() vmas where
-	 * is_linear_pfn_mapping() == TRUE. Handling of
-	 * vm_insert_pfn() is TBD.
-	 */
 	if (is_linear_pfn_mapping(vma)) {
 		/*
 		 * reserve the whole chunk covered by vma. We need the
@@ -729,23 +903,24 @@ int track_pfn_vma_copy(struct vm_area_st
 int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
 			unsigned long pfn, unsigned long size)
 {
+	unsigned long flags;
 	resource_size_t paddr;
 	unsigned long vma_size = vma->vm_end - vma->vm_start;
 
-	if (!pat_enabled)
-		return 0;
-
-	/*
-	 * For now, only handle remap_pfn_range() vmas where
-	 * is_linear_pfn_mapping() == TRUE. Handling of
-	 * vm_insert_pfn() is TBD.
-	 */
 	if (is_linear_pfn_mapping(vma)) {
 		/* reserve the whole chunk starting from vm_pgoff */
 		paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
 		return reserve_pfn_range(paddr, vma_size, prot, 0);
 	}
 
+	if (!pat_enabled)
+		return 0;
+
+	/* for vm_insert_pfn and friends, we set prot based on lookup */
+	flags = lookup_memtype(pfn << PAGE_SHIFT);
+	*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+			 flags);
+
 	return 0;
 }
 
@@ -760,14 +935,6 @@ void untrack_pfn_vma(struct vm_area_stru
 	resource_size_t paddr;
 	unsigned long vma_size = vma->vm_end - vma->vm_start;
 
-	if (!pat_enabled)
-		return;
-
-	/*
-	 * For now, only handle remap_pfn_range() vmas where
-	 * is_linear_pfn_mapping() == TRUE. Handling of
-	 * vm_insert_pfn() is TBD.
-	 */
 	if (is_linear_pfn_mapping(vma)) {
 		/* free the whole chunk starting from vm_pgoff */
 		paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
@@ -844,7 +1011,7 @@ static int memtype_seq_show(struct seq_f
 	return 0;
 }
 
-static struct seq_operations memtype_seq_ops = {
+static const struct seq_operations memtype_seq_ops = {
 	.start = memtype_seq_start,
 	.next  = memtype_seq_next,
 	.stop  = memtype_seq_stop,
--- sle11sp1-2010-03-29.orig/arch/x86/mm/pgtable-xen.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/arch/x86/mm/pgtable-xen.c	2009-11-06 10:52:23.000000000 +0100
@@ -692,8 +692,7 @@ int ptep_set_access_flags(struct vm_area
 		if (likely(vma->vm_mm == current->mm)) {
 			if (HYPERVISOR_update_va_mapping(address,
 				entry,
-				uvm_multi(vma->vm_mm->cpu_vm_mask) |
-					UVMF_INVLPG))
+				uvm_multi(mm_cpumask(vma->vm_mm))|UVMF_INVLPG))
 				BUG();
 		} else {
 			xen_l1_entry_update(ptep, entry);
--- sle11sp1-2010-03-29.orig/arch/x86/mm/physaddr.c	2010-03-31 09:52:27.000000000 +0200
+++ sle11sp1-2010-03-29/arch/x86/mm/physaddr.c	2009-11-06 10:52:23.000000000 +0100
@@ -8,6 +8,10 @@
 
 #ifdef CONFIG_X86_64
 
+#ifdef CONFIG_XEN
+#define phys_base 0
+#endif
+
 unsigned long __phys_addr(unsigned long x)
 {
 	if (x >= __START_KERNEL_map) {
--- sle11sp1-2010-03-29.orig/drivers/acpi/processor_core.c	2010-01-04 12:41:22.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/acpi/processor_core.c	2010-01-04 12:42:24.000000000 +0100
@@ -880,7 +880,7 @@ static int __cpuinit acpi_processor_add(
 
 	result = processor_extcntl_prepare(pr);
 	if (result)
-		goto end;
+		goto err_power_exit;
 
 	pr->cdev = thermal_cooling_device_register("Processor", device,
 						&processor_cooling_ops);
--- sle11sp1-2010-03-29.orig/drivers/char/agp/agp.h	2010-03-31 09:52:27.000000000 +0200
+++ sle11sp1-2010-03-29/drivers/char/agp/agp.h	2009-11-06 10:52:23.000000000 +0100
@@ -31,6 +31,10 @@
 
 #include <asm/agp.h>	/* for flush_agp_cache() */
 
+#ifndef virt_to_gart
+#define virt_to_gart virt_to_phys
+#endif
+
 #define PFX "agpgart: "
 
 //#define AGP_DEBUG 1
--- sle11sp1-2010-03-29.orig/drivers/char/agp/amd-k7-agp.c	2010-03-31 09:52:27.000000000 +0200
+++ sle11sp1-2010-03-29/drivers/char/agp/amd-k7-agp.c	2009-11-06 10:52:23.000000000 +0100
@@ -44,7 +44,7 @@ static int amd_create_page_map(struct am
 #ifndef CONFIG_X86
 	SetPageReserved(virt_to_page(page_map->real));
 	global_cache_flush();
-	page_map->remapped = ioremap_nocache(virt_to_phys(page_map->real),
+	page_map->remapped = ioremap_nocache(virt_to_gart(page_map->real),
 					    PAGE_SIZE);
 	if (page_map->remapped == NULL) {
 		ClearPageReserved(virt_to_page(page_map->real));
@@ -160,7 +160,7 @@ static int amd_create_gatt_table(struct 
 
 	agp_bridge->gatt_table_real = (u32 *)page_dir.real;
 	agp_bridge->gatt_table = (u32 __iomem *)page_dir.remapped;
-	agp_bridge->gatt_bus_addr = virt_to_phys(page_dir.real);
+	agp_bridge->gatt_bus_addr = virt_to_gart(page_dir.real);
 
 	/* Get the address for the gart region.
 	 * This is a bus address even on the alpha, b/c its
@@ -173,7 +173,7 @@ static int amd_create_gatt_table(struct 
 
 	/* Calculate the agp offset */
 	for (i = 0; i < value->num_entries / 1024; i++, addr += 0x00400000) {
-		writel(virt_to_phys(amd_irongate_private.gatt_pages[i]->real) | 1,
+		writel(virt_to_gart(amd_irongate_private.gatt_pages[i]->real) | 1,
 			page_dir.remapped+GET_PAGE_DIR_OFF(addr));
 		readl(page_dir.remapped+GET_PAGE_DIR_OFF(addr));	/* PCI Posting. */
 	}
--- sle11sp1-2010-03-29.orig/drivers/char/agp/amd64-agp.c	2010-03-31 09:52:27.000000000 +0200
+++ sle11sp1-2010-03-29/drivers/char/agp/amd64-agp.c	2009-11-06 10:52:23.000000000 +0100
@@ -178,7 +178,7 @@ static const struct aper_size_info_32 am
 
 static int amd_8151_configure(void)
 {
-	unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real);
+	unsigned long gatt_bus = virt_to_gart(agp_bridge->gatt_table_real);
 	int i;
 
 	/* Configure AGP regs in each x86-64 host bridge. */
@@ -558,7 +558,7 @@ static void __devexit agp_amd64_remove(s
 {
 	struct agp_bridge_data *bridge = pci_get_drvdata(pdev);
 
-	release_mem_region(virt_to_phys(bridge->gatt_table_real),
+	release_mem_region(virt_to_gart(bridge->gatt_table_real),
 			   amd64_aperture_sizes[bridge->aperture_size_idx].size);
 	agp_remove_bridge(bridge);
 	agp_put_bridge(bridge);
--- sle11sp1-2010-03-29.orig/drivers/char/agp/ati-agp.c	2010-03-31 09:52:27.000000000 +0200
+++ sle11sp1-2010-03-29/drivers/char/agp/ati-agp.c	2009-11-06 10:52:23.000000000 +0100
@@ -360,7 +360,7 @@ static int ati_create_gatt_table(struct 
 
 	agp_bridge->gatt_table_real = (u32 *)page_dir.real;
 	agp_bridge->gatt_table = (u32 __iomem *) page_dir.remapped;
-	agp_bridge->gatt_bus_addr = virt_to_phys(page_dir.real);
+	agp_bridge->gatt_bus_addr = virt_to_gart(page_dir.real);
 
 	/* Write out the size register */
 	current_size = A_SIZE_LVL2(agp_bridge->current_size);
@@ -390,7 +390,7 @@ static int ati_create_gatt_table(struct 
 
 	/* Calculate the agp offset */
 	for (i = 0; i < value->num_entries / 1024; i++, addr += 0x00400000) {
-		writel(virt_to_phys(ati_generic_private.gatt_pages[i]->real) | 1,
+		writel(virt_to_gart(ati_generic_private.gatt_pages[i]->real) | 1,
 			page_dir.remapped+GET_PAGE_DIR_OFF(addr));
 		readl(page_dir.remapped+GET_PAGE_DIR_OFF(addr));	/* PCI Posting. */
 	}
--- sle11sp1-2010-03-29.orig/drivers/char/agp/efficeon-agp.c	2010-03-31 09:52:27.000000000 +0200
+++ sle11sp1-2010-03-29/drivers/char/agp/efficeon-agp.c	2009-11-06 10:52:23.000000000 +0100
@@ -226,7 +226,7 @@ static int efficeon_create_gatt_table(st
 
 		efficeon_private.l1_table[index] = page;
 
-		value = virt_to_phys((unsigned long *)page) | pati | present | index;
+		value = virt_to_gart((unsigned long *)page) | pati | present | index;
 
 		pci_write_config_dword(agp_bridge->dev,
 			EFFICEON_ATTPAGE, value);
--- sle11sp1-2010-03-29.orig/drivers/char/agp/generic.c	2010-03-31 09:52:27.000000000 +0200
+++ sle11sp1-2010-03-29/drivers/char/agp/generic.c	2009-11-06 10:52:23.000000000 +0100
@@ -988,7 +988,7 @@ int agp_generic_create_gatt_table(struct
 	set_memory_uc((unsigned long)table, 1 << page_order);
 	bridge->gatt_table = (void *)table;
 #else
-	bridge->gatt_table = ioremap_nocache(virt_to_phys(table),
+	bridge->gatt_table = ioremap_nocache(virt_to_gart(table),
 					(PAGE_SIZE * (1 << page_order)));
 	bridge->driver->cache_flush();
 #endif
@@ -1001,7 +1001,7 @@ int agp_generic_create_gatt_table(struct
 
 		return -ENOMEM;
 	}
-	bridge->gatt_bus_addr = virt_to_phys(bridge->gatt_table_real);
+	bridge->gatt_bus_addr = virt_to_gart(bridge->gatt_table_real);
 
 	/* AK: bogus, should encode addresses > 4GB */
 	for (i = 0; i < num_entries; i++) {
--- sle11sp1-2010-03-29.orig/drivers/char/agp/sworks-agp.c	2010-03-31 09:52:27.000000000 +0200
+++ sle11sp1-2010-03-29/drivers/char/agp/sworks-agp.c	2009-11-06 10:52:23.000000000 +0100
@@ -155,7 +155,7 @@ static int serverworks_create_gatt_table
 	/* Create a fake scratch directory */
 	for (i = 0; i < 1024; i++) {
 		writel(agp_bridge->scratch_page, serverworks_private.scratch_dir.remapped+i);
-		writel(virt_to_phys(serverworks_private.scratch_dir.real) | 1, page_dir.remapped+i);
+		writel(virt_to_gart(serverworks_private.scratch_dir.real) | 1, page_dir.remapped+i);
 	}
 
 	retval = serverworks_create_gatt_pages(value->num_entries / 1024);
@@ -167,7 +167,7 @@ static int serverworks_create_gatt_table
 
 	agp_bridge->gatt_table_real = (u32 *)page_dir.real;
 	agp_bridge->gatt_table = (u32 __iomem *)page_dir.remapped;
-	agp_bridge->gatt_bus_addr = virt_to_phys(page_dir.real);
+	agp_bridge->gatt_bus_addr = virt_to_gart(page_dir.real);
 
 	/* Get the address for the gart region.
 	 * This is a bus address even on the alpha, b/c its
@@ -179,7 +179,7 @@ static int serverworks_create_gatt_table
 
 	/* Calculate the agp offset */
 	for (i = 0; i < value->num_entries / 1024; i++)
-		writel(virt_to_phys(serverworks_private.gatt_pages[i]->real)|1, page_dir.remapped+i);
+		writel(virt_to_gart(serverworks_private.gatt_pages[i]->real)|1, page_dir.remapped+i);
 
 	return 0;
 }
--- sle11sp1-2010-03-29.orig/drivers/net/Kconfig	2009-11-06 10:51:07.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/net/Kconfig	2009-11-06 11:36:52.000000000 +0100
@@ -3235,7 +3235,7 @@ config VIRTIO_NET
 
 config VMXNET3
        tristate "VMware VMXNET3 ethernet driver"
-       depends on PCI && X86 && INET
+       depends on PCI && X86 && !XEN && INET
        help
          This driver supports VMware's vmxnet3 virtual ethernet NIC.
          To compile this driver as a module, choose M here: the
--- sle11sp1-2010-03-29.orig/drivers/pci/msi-xen.c	2009-12-04 11:30:30.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/pci/msi-xen.c	2009-12-04 11:32:09.000000000 +0100
@@ -16,12 +16,11 @@
 #include <linux/proc_fs.h>
 #include <linux/msi.h>
 #include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/io.h>
 
 #include <xen/evtchn.h>
 
-#include <asm/errno.h>
-#include <asm/io.h>
-
 #include "pci.h"
 #include "msi.h"
 
@@ -479,7 +478,7 @@ static int msix_capability_init(struct p
  * to determine if MSI/-X are supported for the device. If MSI/-X is
  * supported return 0, else return an error code.
  **/
-static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type)
+static int pci_msi_check_device(struct pci_dev *dev, int nvec, int type)
 {
 	struct pci_bus *bus;
 	int ret;
@@ -496,8 +495,9 @@ static int pci_msi_check_device(struct p
 	if (nvec < 1)
 		return -ERANGE;
 
-	/* Any bridge which does NOT route MSI transactions from it's
-	 * secondary bus to it's primary bus must set NO_MSI flag on
+	/*
+	 * Any bridge which does NOT route MSI transactions from its
+	 * secondary bus to its primary bus must set NO_MSI flag on
 	 * the secondary pci_bus.
 	 * We expect only arch-specific PCI host bus controller driver
 	 * or quirks for specific PCI bridges to be setting NO_MSI.
@@ -615,7 +615,7 @@ void pci_msi_shutdown(struct pci_dev *de
 	dev->msi_enabled = 0;
 }
 
-void pci_disable_msi(struct pci_dev* dev)
+void pci_disable_msi(struct pci_dev *dev)
 {
 	pci_msi_shutdown(dev);
 }
@@ -655,14 +655,14 @@ int pci_msix_table_size(struct pci_dev *
  **/
 extern int pci_frontend_enable_msix(struct pci_dev *dev,
 		struct msix_entry *entries, int nvec);
-int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
+int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
 {
 	int status, nr_entries;
 	int i, j, temp;
 	struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
 
 	if (!entries)
- 		return -EINVAL;
+		return -EINVAL;
 
 #ifdef CONFIG_XEN_PCIDEV_FRONTEND
 	if (!is_initial_xendomain()) {
@@ -737,7 +737,7 @@ int pci_enable_msix(struct pci_dev* dev,
 EXPORT_SYMBOL(pci_enable_msix);
 
 extern void pci_frontend_disable_msix(struct pci_dev* dev);
-void pci_msix_shutdown(struct pci_dev* dev)
+void pci_msix_shutdown(struct pci_dev *dev)
 {
 	if (!pci_msi_enable || !dev || !dev->msix_enabled)
 		return;
@@ -770,7 +770,8 @@ void pci_msix_shutdown(struct pci_dev* d
 	pci_intx_for_msi(dev, 1);
 	dev->msix_enabled = 0;
 }
-void pci_disable_msix(struct pci_dev* dev)
+
+void pci_disable_msix(struct pci_dev *dev)
 {
 	pci_msix_shutdown(dev);
 }
@@ -785,14 +786,14 @@ EXPORT_SYMBOL(pci_disable_msix);
  * allocated for this device function, are reclaimed to unused state,
  * which may be used later on.
  **/
-void msi_remove_pci_irq_vectors(struct pci_dev* dev)
+void msi_remove_pci_irq_vectors(struct pci_dev *dev)
 {
 	unsigned long flags;
 	struct msi_dev_list *msi_dev_entry;
 	struct msi_pirq_entry *pirq_entry, *tmp;
 
 	if (!pci_msi_enable || !dev)
- 		return;
+		return;
 
 	msi_dev_entry = get_msi_dev_pirq_list(dev);
 
--- sle11sp1-2010-03-29.orig/drivers/pci/probe.c	2010-03-29 09:08:49.000000000 +0200
+++ sle11sp1-2010-03-29/drivers/pci/probe.c	2010-03-29 09:09:32.000000000 +0200
@@ -1133,7 +1133,11 @@ int pci_scan_slot(struct pci_bus *bus, i
 	if (dev && !dev->is_added)	/* new device? */
 		nr++;
 
+#ifndef pcibios_scan_all_fns
 	if (dev && dev->multifunction) {
+#else
+	if (dev ? dev->multifunction : pcibios_scan_all_fns(bus, devfn)) {
+#endif
 		for (fn = 1; fn < 8; fn++) {
 			dev = pci_scan_single_device(bus, devfn + fn);
 			if (dev) {
--- sle11sp1-2010-03-29.orig/drivers/sfi/sfi_core.c	2010-03-31 09:52:27.000000000 +0200
+++ sle11sp1-2010-03-29/drivers/sfi/sfi_core.c	2009-12-16 11:53:57.000000000 +0100
@@ -387,6 +387,11 @@ void __init sfi_init(void)
 	if (!acpi_disabled)
 		disable_sfi();
 
+#ifdef CONFIG_XEN
+	if (!is_initial_xendomain())
+		disable_sfi();
+#endif
+
 	if (sfi_disabled)
 		return;
 
--- sle11sp1-2010-03-29.orig/drivers/staging/hv/Kconfig	2010-03-31 09:52:27.000000000 +0200
+++ sle11sp1-2010-03-29/drivers/staging/hv/Kconfig	2009-11-06 10:52:23.000000000 +0100
@@ -1,6 +1,6 @@
 config HYPERV
 	tristate "Microsoft Hyper-V client drivers"
-	depends on X86 && m
+	depends on X86 && !XEN && m
 	default n
 	help
 	  Select this option to run Linux as a Hyper-V client operating
--- sle11sp1-2010-03-29.orig/drivers/xen/Kconfig	2009-12-18 12:27:52.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/xen/Kconfig	2010-03-26 08:39:39.000000000 +0100
@@ -22,6 +22,7 @@ config XEN_UNPRIVILEGED_GUEST
 	select PM
 	select PM_SLEEP
 	select PM_SLEEP_SMP if SMP
+	select PM_RUNTIME if PCI
 	select SUSPEND
 
 config XEN_PRIVCMD
--- sle11sp1-2010-03-29.orig/drivers/xen/Makefile	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/xen/Makefile	2009-11-06 10:52:23.000000000 +0100
@@ -10,6 +10,11 @@ obj-$(CONFIG_XEN)		+= evtchn/
 obj-y				+= xenbus/
 obj-$(CONFIG_XEN)		+= char/
 
+nostackp := $(call cc-option, -fno-stack-protector)
+ifeq ($(CONFIG_PARAVIRT_XEN),y)
+CFLAGS_features.o			:= $(nostackp)
+endif
+
 obj-$(CONFIG_XEN)		+= features.o util.o
 obj-$(CONFIG_HOTPLUG_CPU)	+= $(xen-hotplug-y)
 obj-$(CONFIG_XEN_XENCOMM)	+= $(xen-xencomm-y)
--- sle11sp1-2010-03-29.orig/drivers/xen/balloon/balloon.c	2010-02-02 14:56:12.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/xen/balloon/balloon.c	2010-03-31 09:58:59.000000000 +0200
@@ -77,6 +77,11 @@ static DEFINE_MUTEX(balloon_mutex);
  */
 DEFINE_SPINLOCK(balloon_lock);
 
+#ifndef MODULE
+#include <linux/pagevec.h>
+static struct pagevec free_pagevec;
+#endif
+
 struct balloon_stats balloon_stats;
 
 /* We increase/decrease in batches which fit in a page */
@@ -200,14 +205,27 @@ static struct page *balloon_next_page(st
 static inline void balloon_free_page(struct page *page)
 {
 #ifndef MODULE
-	if (put_page_testzero(page))
-		free_cold_page(page);
+	if (put_page_testzero(page) && !pagevec_add(&free_pagevec, page)) {
+		__pagevec_free(&free_pagevec);
+		pagevec_reinit(&free_pagevec);
+	}
 #else
-	/* free_cold_page() is not being exported. */
+	/* pagevec interface is not being exported. */
 	__free_page(page);
 #endif
 }
 
+static inline void balloon_free_and_unlock(unsigned long flags)
+{
+#ifndef MODULE
+	if (pagevec_count(&free_pagevec)) {
+		__pagevec_free(&free_pagevec);
+		pagevec_reinit(&free_pagevec);
+	}
+#endif
+	balloon_unlock(flags);
+}
+
 static void balloon_alarm(unsigned long unused)
 {
 	schedule_work(&balloon_worker);
@@ -320,7 +338,7 @@ static int increase_reservation(unsigned
 	totalram_pages = bs.current_pages - totalram_bias;
 
  out:
-	balloon_unlock(flags);
+	balloon_free_and_unlock(flags);
 
 #ifndef MODULE
 	setup_per_zone_wmarks();
@@ -559,6 +577,7 @@ static int __init balloon_init(void)
 	IPRINTK("Initialising balloon driver.\n");
 
 #ifdef CONFIG_XEN
+	pagevec_init(&free_pagevec, true);
 	bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
 	totalram_pages   = bs.current_pages;
 #else 
@@ -720,8 +739,8 @@ struct page **alloc_empty_pages_and_page
 		}
 
 		if (ret != 0) {
-			balloon_unlock(flags);
 			balloon_free_page(page);
+			balloon_free_and_unlock(flags);
 			goto err;
 		}
 
--- sle11sp1-2010-03-29.orig/drivers/xen/blkfront/vbd.c	2010-01-18 16:54:23.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/xen/blkfront/vbd.c	2010-01-18 16:54:56.000000000 +0100
@@ -105,7 +105,7 @@ static struct xlbd_major_info *major_inf
 
 #define XLBD_MAJOR_VBD_ALT(idx) ((idx) ^ XLBD_MAJOR_VBD_START ^ (XLBD_MAJOR_VBD_START + 1))
 
-static struct block_device_operations xlvbd_block_fops =
+static const struct block_device_operations xlvbd_block_fops =
 {
 	.owner = THIS_MODULE,
 	.open = blkif_open,
--- sle11sp1-2010-03-29.orig/drivers/xen/blktap2/device.c	2010-03-01 14:34:58.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/xen/blktap2/device.c	2009-11-06 10:52:23.000000000 +0100
@@ -141,7 +141,7 @@ blktap_device_ioctl(struct block_device 
 	return 0;
 }
 
-static struct block_device_operations blktap_device_file_operations = {
+static const struct block_device_operations blktap_device_file_operations = {
 	.owner     = THIS_MODULE,
 	.open      = blktap_device_open,
 	.release   = blktap_device_release,
--- sle11sp1-2010-03-29.orig/drivers/xen/core/evtchn.c	2010-02-09 17:06:02.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/xen/core/evtchn.c	2009-11-06 11:04:38.000000000 +0100
@@ -138,13 +138,13 @@ static inline unsigned int type_from_irq
 }
 
 /* IRQ <-> VIRQ mapping. */
-DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
+DEFINE_PER_CPU(int[NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
 
 /* IRQ <-> IPI mapping. */
 #ifndef NR_IPIS
 #define NR_IPIS 1
 #endif
-DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1};
+DEFINE_PER_CPU(int[NR_IPIS], ipi_to_irq) = {[0 ... NR_IPIS-1] = -1};
 
 #ifdef CONFIG_SMP
 
--- sle11sp1-2010-03-29.orig/drivers/xen/core/reboot.c	2009-11-06 10:51:42.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/xen/core/reboot.c	2010-03-15 12:13:22.000000000 +0100
@@ -83,7 +83,7 @@ static int xen_suspend(void *__unused)
 	int err, old_state;
 
 	daemonize("suspend");
-	err = set_cpus_allowed(current, cpumask_of_cpu(0));
+	err = set_cpus_allowed_ptr(current, cpumask_of(0));
 	if (err) {
 		printk(KERN_ERR "Xen suspend can't run on CPU0 (%d)\n", err);
 		goto fail;
--- sle11sp1-2010-03-29.orig/drivers/xen/netback/interface.c	2010-01-04 12:40:53.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/xen/netback/interface.c	2010-01-04 12:42:38.000000000 +0100
@@ -159,7 +159,7 @@ static void netbk_get_strings(struct net
 	}
 }
 
-static struct ethtool_ops network_ethtool_ops =
+static const struct ethtool_ops network_ethtool_ops =
 {
 	.get_drvinfo = netbk_get_drvinfo,
 
--- sle11sp1-2010-03-29.orig/drivers/xen/netback/loopback.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/xen/netback/loopback.c	2009-11-06 10:52:23.000000000 +0100
@@ -134,7 +134,7 @@ static int loopback_start_xmit(struct sk
 	if (!skb_remove_foreign_references(skb)) {
 		np->stats.tx_dropped++;
 		dev_kfree_skb(skb);
-		return 0;
+		return NETDEV_TX_OK;
 	}
 
 	dst_release(skb_dst(skb));
@@ -173,7 +173,7 @@ static int loopback_start_xmit(struct sk
 
 	netif_rx(skb);
 
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 static struct net_device_stats *loopback_get_stats(struct net_device *dev)
@@ -182,7 +182,7 @@ static struct net_device_stats *loopback
 	return &np->stats;
 }
 
-static struct ethtool_ops network_ethtool_ops =
+static const struct ethtool_ops network_ethtool_ops =
 {
 	.get_tx_csum = ethtool_op_get_tx_csum,
 	.set_tx_csum = ethtool_op_set_tx_csum,
--- sle11sp1-2010-03-29.orig/drivers/xen/netback/netback.c	2010-01-04 12:40:57.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/xen/netback/netback.c	2009-11-06 10:52:23.000000000 +0100
@@ -340,12 +340,12 @@ int netif_be_start_xmit(struct sk_buff *
 	skb_queue_tail(&rx_queue, skb);
 	tasklet_schedule(&net_rx_tasklet);
 
-	return 0;
+	return NETDEV_TX_OK;
 
  drop:
 	netif->stats.tx_dropped++;
 	dev_kfree_skb(skb);
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 #if 0
--- sle11sp1-2010-03-29.orig/drivers/xen/netfront/netfront.c	2009-11-06 10:52:09.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/xen/netfront/netfront.c	2009-11-06 10:52:23.000000000 +0100
@@ -953,7 +953,7 @@ static int network_start_xmit(struct sk_
  	if (np->accel_vif_state.hooks && 
  	    np->accel_vif_state.hooks->start_xmit(skb, dev)) { 
  		/* Fast path has sent this packet */ 
- 		return 0; 
+ 		return NETDEV_TX_OK;
  	} 
 
 	frags += DIV_ROUND_UP(offset + len, PAGE_SIZE);
@@ -1042,12 +1042,12 @@ static int network_start_xmit(struct sk_
 
 	spin_unlock_irq(&np->tx_lock);
 
-	return 0;
+	return NETDEV_TX_OK;
 
  drop:
 	np->stats.tx_dropped++;
 	dev_kfree_skb(skb);
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 static irqreturn_t netif_int(int irq, void *dev_id)
@@ -1872,7 +1872,7 @@ static void netif_uninit(struct net_devi
 	gnttab_free_grant_references(np->gref_rx_head);
 }
 
-static struct ethtool_ops network_ethtool_ops =
+static const struct ethtool_ops network_ethtool_ops =
 {
 	.get_tx_csum = ethtool_op_get_tx_csum,
 	.set_tx_csum = ethtool_op_set_tx_csum,
--- sle11sp1-2010-03-29.orig/drivers/xen/sfc_netback/accel_fwd.c	2009-11-06 10:51:17.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/xen/sfc_netback/accel_fwd.c	2009-11-06 10:52:23.000000000 +0100
@@ -181,11 +181,10 @@ int netback_accel_fwd_add(const __u8 *ma
 	unsigned long flags;
 	cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac);
 	struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
-	DECLARE_MAC_BUF(buf);
 
 	BUG_ON(fwd_priv == NULL);
 
-	DPRINTK("Adding mac %s\n", print_mac(buf, mac));
+	DPRINTK("Adding mac %pM\n", mac);
        
 	spin_lock_irqsave(&fwd_set->fwd_lock, flags);
 	
@@ -200,8 +199,7 @@ int netback_accel_fwd_add(const __u8 *ma
 	if (cuckoo_hash_lookup(&fwd_set->fwd_hash_table,
 			       (cuckoo_hash_key *)(&key), &rc) != 0) {
 		spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
-		EPRINTK("MAC address %s already accelerated.\n",
-			print_mac(buf, mac));
+		EPRINTK("MAC address %pM already accelerated.\n", mac);
 		return -EEXIST;
 	}
 
@@ -236,9 +234,8 @@ void netback_accel_fwd_remove(const __u8
 	unsigned long flags;
 	cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac);
 	struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
-	DECLARE_MAC_BUF(buf);
 
-	DPRINTK("Removing mac %s\n", print_mac(buf, mac));
+	DPRINTK("Removing mac %pM\n", mac);
 
 	BUG_ON(fwd_priv == NULL);
 
@@ -396,16 +393,14 @@ void netback_accel_tx_packet(struct sk_b
 
 	if (is_broadcast_ether_addr(skb_mac_header(skb))
 	    && packet_is_arp_reply(skb)) {
-		DECLARE_MAC_BUF(buf);
-
 		/*
 		 * update our fast path forwarding to reflect this
 		 * gratuitous ARP
 		 */ 
 		mac = skb_mac_header(skb)+ETH_ALEN;
 
-		DPRINTK("%s: found gratuitous ARP for %s\n",
-			__FUNCTION__, print_mac(buf, mac));
+		DPRINTK("%s: found gratuitous ARP for %pM\n",
+			__FUNCTION__, mac);
 
 		spin_lock_irqsave(&fwd_set->fwd_lock, flags);
 		/*
--- sle11sp1-2010-03-29.orig/drivers/xen/sfc_netback/accel_msg.c	2009-11-06 10:51:17.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/xen/sfc_netback/accel_msg.c	2009-11-06 10:52:23.000000000 +0100
@@ -57,11 +57,10 @@ static void netback_accel_msg_tx_localma
 {
 	unsigned long lock_state;
 	struct net_accel_msg *msg;
-	DECLARE_MAC_BUF(buf);
 
 	BUG_ON(bend == NULL || mac == NULL);
 
-	VPRINTK("Sending local mac message: %s\n", print_mac(buf, mac));
+	VPRINTK("Sending local mac message: %pM\n", mac);
 	
 	msg = net_accel_msg_start_send(bend->shared_page, &bend->to_domU,
 				       &lock_state);
--- sle11sp1-2010-03-29.orig/drivers/xen/sfc_netfront/accel_msg.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/xen/sfc_netfront/accel_msg.c	2009-11-06 10:52:23.000000000 +0100
@@ -327,10 +327,8 @@ static int vnic_process_localmac_msg(net
 	cuckoo_hash_mac_key key;
 
 	if (msg->u.localmac.flags & NET_ACCEL_MSG_ADD) {
-		DECLARE_MAC_BUF(buf);
-
-		DPRINTK("MAC has moved, could be local: %s\n",
-			print_mac(buf, msg->u.localmac.mac));
+		DPRINTK("MAC has moved, could be local: %pM\n",
+			msg->u.localmac.mac);
 		key = cuckoo_mac_to_key(msg->u.localmac.mac);
 		spin_lock_irqsave(&vnic->table_lock, flags);
 		/* Try to remove it, not a big deal if not there */
--- sle11sp1-2010-03-29.orig/drivers/xen/sfc_netfront/accel_vi.c	2010-01-18 16:19:18.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/xen/sfc_netfront/accel_vi.c	2010-01-18 16:55:03.000000000 +0100
@@ -643,10 +643,7 @@ netfront_accel_vi_tx_post(netfront_accel
 					  (cuckoo_hash_key *)(&key), &value);
 
 	if (!try_fastpath) {
-		DECLARE_MAC_BUF(buf);
-
-		VPRINTK("try fast path false for mac: %s\n",
-			print_mac(buf, skb->data));
+		VPRINTK("try fast path false for mac: %pM\n", skb->data);
 		
 		return NETFRONT_ACCEL_STATUS_CANT;
 	}
@@ -772,10 +769,9 @@ static void  netfront_accel_vi_rx_comple
 	if (compare_ether_addr(skb->data, vnic->mac)) {
 		struct iphdr *ip = (struct iphdr *)(skb->data + ETH_HLEN);
 		u16 port;
-		DECLARE_MAC_BUF(buf);
 
-		DPRINTK("%s: saw wrong MAC address %s\n",
-			__FUNCTION__, print_mac(buf, skb->data));
+		DPRINTK("%s: saw wrong MAC address %pM\n",
+			__FUNCTION__, skb->data);
 
 		if (ip->protocol == IPPROTO_TCP) {
 			struct tcphdr *tcp = (struct tcphdr *)
--- sle11sp1-2010-03-29.orig/drivers/xen/xenbus/xenbus_dev.c	2009-05-29 10:25:53.000000000 +0200
+++ sle11sp1-2010-03-29/drivers/xen/xenbus/xenbus_dev.c	2009-11-06 10:52:23.000000000 +0100
@@ -36,6 +36,7 @@
 #include <linux/errno.h>
 #include <linux/uio.h>
 #include <linux/notifier.h>
+#include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/fs.h>
 #include <linux/poll.h>
--- sle11sp1-2010-03-29.orig/drivers/xen/xenbus/xenbus_probe.c	2009-12-04 11:30:36.000000000 +0100
+++ sle11sp1-2010-03-29/drivers/xen/xenbus/xenbus_probe.c	2009-11-06 10:52:23.000000000 +0100
@@ -42,6 +42,7 @@
 #include <linux/ctype.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
+#include <linux/sched.h>
 #include <linux/proc_fs.h>
 #include <linux/notifier.h>
 #include <linux/mutex.h>
--- sle11sp1-2010-03-29.orig/lib/swiotlb-xen.c	2009-12-14 17:30:30.000000000 +0100
+++ sle11sp1-2010-03-29/lib/swiotlb-xen.c	2009-12-14 17:36:28.000000000 +0100
@@ -111,79 +111,11 @@ setup_io_tlb_npages(char *str)
 __setup("swiotlb=", setup_io_tlb_npages);
 /* make io_tlb_overflow tunable too? */
 
-void *__init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
-{
-	void *start = alloc_bootmem_pages(size);
-	unsigned int i;
-	int rc;
-
-	dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
-	for (i = 0; i < nslabs; i += IO_TLB_SEGSIZE) {
-		do {
-			rc = xen_create_contiguous_region(
-				(unsigned long)start + (i << IO_TLB_SHIFT),
-				get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
-				dma_bits);
-		} while (rc && dma_bits++ < max_dma_bits);
-		if (rc) {
-			if (i == 0)
-				panic("No suitable physical memory available for SWIOTLB buffer!\n"
-				      "Use dom0_mem Xen boot parameter to reserve\n"
-				      "some DMA memory (e.g., dom0_mem=-128M).\n");
-			io_tlb_nslabs = i;
-			i <<= IO_TLB_SHIFT;
-			free_bootmem(__pa(start + i), size - i);
-			size = i;
-			for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) {
-				unsigned int bits = fls64(virt_to_bus(start + i - 1));
-
-				if (bits > dma_bits)
-					dma_bits = bits;
-			}
-			break;
-		}
-	}
-
-	return start;
-}
-
-#ifndef CONFIG_XEN
-void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs)
-{
-	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
-}
-#endif
-
-dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
-{
-	return phys_to_machine(paddr);
-}
-
-phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
-{
-	return machine_to_phys(baddr);
-}
-
+/* Note that this doesn't work with highmem page */
 static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 				      volatile void *address)
 {
-	return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
-}
-
-void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address)
-{
-	return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
-}
-
-int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
-					       dma_addr_t addr, size_t size)
-{
-	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
-}
-
-int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
-{
-	return 0;
+	return phys_to_dma(hwdev, virt_to_phys(address));
 }
 
 static void swiotlb_print_info(unsigned long bytes)
@@ -216,10 +148,35 @@ swiotlb_init_with_default_size(size_t de
 	/*
 	 * Get IO TLB memory from the low pages
 	 */
-	io_tlb_start = swiotlb_alloc_boot(bytes, io_tlb_nslabs);
+	io_tlb_start = alloc_bootmem_pages(bytes);
 	if (!io_tlb_start)
 		panic("Cannot allocate SWIOTLB buffer!\n");
-	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+	dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
+	for (i = 0; i < io_tlb_nslabs; i += IO_TLB_SEGSIZE) {
+		do {
+			rc = xen_create_contiguous_region(
+				(unsigned long)io_tlb_start + (i << IO_TLB_SHIFT),
+				get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
+				dma_bits);
+		} while (rc && dma_bits++ < max_dma_bits);
+		if (rc) {
+			if (i == 0)
+				panic("No suitable physical memory available for SWIOTLB buffer!\n"
+				      "Use dom0_mem Xen boot parameter to reserve\n"
+				      "some DMA memory (e.g., dom0_mem=-128M).\n");
+			io_tlb_nslabs = i;
+			i <<= IO_TLB_SHIFT;
+			free_bootmem(__pa(io_tlb_start + i), bytes - i);
+			bytes = i;
+			for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) {
+				unsigned int bits = fls64(virt_to_bus(io_tlb_start + i - 1));
+
+				if (bits > dma_bits)
+					dma_bits = bits;
+			}
+			break;
+		}
+	}
 	io_tlb_end = io_tlb_start + bytes;
 
 	/*
@@ -283,13 +240,10 @@ static inline int range_needs_mapping(ph
 static int is_swiotlb_buffer(dma_addr_t addr)
 {
 	unsigned long pfn = mfn_to_local_pfn(PFN_DOWN(addr));
-	char *va = pfn_valid(pfn) ? __va(pfn << PAGE_SHIFT) : NULL;
+	phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT;
 
-#ifdef CONFIG_HIGHMEM
-	if (pfn >= highstart_pfn)
-		return 0;
-#endif
-	return va >= io_tlb_start && va < io_tlb_end;
+	return paddr >= virt_to_phys(io_tlb_start) &&
+		paddr < virt_to_phys(io_tlb_end);
 }
 
 /*
@@ -514,12 +468,15 @@ swiotlb_full(struct device *dev, size_t 
 	printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %zu bytes at "
 	       "device %s\n", size, dev ? dev_name(dev) : "?");
 
-	if (size > io_tlb_overflow && do_panic) {
-		if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
-			panic("PCI-DMA: Memory would be corrupted\n");
-		if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
-			panic("PCI-DMA: Random memory would be DMAed\n");
-	}
+	if (size <= io_tlb_overflow || !do_panic)
+		return;
+
+	if (dir == DMA_BIDIRECTIONAL)
+		panic("DMA: Random memory could be DMA accessed\n");
+	if (dir == DMA_FROM_DEVICE)
+		panic("DMA: Random memory could be DMA written\n");
+	if (dir == DMA_TO_DEVICE)
+		panic("DMA: Random memory could be DMA read\n");
 }
 
 /*
@@ -545,7 +502,7 @@ dma_addr_t swiotlb_map_page(struct devic
 	 * we can safely return the device addr and not worry about bounce
 	 * buffering it.
 	 */
-	if (!address_needs_mapping(dev, dev_addr, size) &&
+	if (dma_capable(dev, dev_addr, size) &&
 	    !range_needs_mapping(phys, size))
 		return dev_addr;
 
@@ -575,12 +532,12 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
 static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
 			 size_t size, int dir)
 {
-	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 
 	if (is_swiotlb_buffer(dev_addr)) {
-		do_unmap_single(hwdev, dma_addr, size, dir);
+		do_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
 		return;
 	}
 
@@ -609,12 +566,12 @@ void
 swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
 			    size_t size, enum dma_data_direction dir)
 {
-	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 
 	if (is_swiotlb_buffer(dev_addr))
-		sync_single(hwdev, dma_addr, size, dir);
+		sync_single(hwdev, phys_to_virt(paddr), size, dir);
 }
 EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
 
@@ -622,12 +579,12 @@ void
 swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
 			       size_t size, enum dma_data_direction dir)
 {
-	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 
 	if (is_swiotlb_buffer(dev_addr))
-		sync_single(hwdev, dma_addr, size, dir);
+		sync_single(hwdev, phys_to_virt(paddr), size, dir);
 }
 EXPORT_SYMBOL(swiotlb_sync_single_for_device);
 
@@ -680,8 +637,8 @@ swiotlb_map_sg_attrs(struct device *hwde
 		phys_addr_t paddr = page_to_pseudophys(sg_page(sg))
 				   + sg->offset;
 
-		if (range_needs_mapping(paddr, sg->length)
-		    || address_needs_mapping(hwdev, dev_addr, sg->length)) {
+		if (range_needs_mapping(paddr, sg->length) ||
+		    !dma_capable(hwdev, dev_addr, sg->length)) {
 			void *map;
 
 			gnttab_dma_unmap_page(dev_addr);