Sophie

Sophie

distrib > Mandriva > current > x86_64 > by-pkgid > d41cc2729f5f0fbbd2607b14a49457f3 > files > 94

kernel-xen-2.6.32.11-2mdv2010.1.src.rpm

From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 2.6.31
Patch-mainline: 2.6.31

 This patch contains the differences between 2.6.30 and 2.6.31.

Acked-by: Jeff Mahoney <jeffm@suse.com>
Automatically created from "patches.kernel.org/patch-2.6.31" by xen-port-patches.py

--- sle11sp1-2010-03-22.orig/arch/x86/Kconfig	2010-01-27 14:45:20.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/Kconfig	2010-02-09 17:06:32.000000000 +0100
@@ -24,7 +24,7 @@ config X86
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_IDE
 	select HAVE_OPROFILE
-	select HAVE_PERF_EVENTS if (!M386 && !M486)
+	select HAVE_PERF_EVENTS if (!M386 && !M486 && !XEN)
 	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
 	select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -902,7 +902,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
 
 config X86_MCE
 	bool "Machine Check / overheating reporting"
-	depends on !X86_XEN && !XEN_UNPRIVILEGED_GUEST
+	depends on !XEN_UNPRIVILEGED_GUEST
 	---help---
 	  Machine Check support allows the processor to notify the
 	  kernel if it detects a problem (e.g. overheating, data corruption).
@@ -935,7 +935,7 @@ config X86_MCE_AMD
 
 config X86_ANCIENT_MCE
 	def_bool n
-	depends on X86_32 && X86_MCE
+	depends on X86_32 && X86_MCE && !XEN
 	prompt "Support for old Pentium 5 / WinChip machine checks"
 	---help---
 	  Include support for machine check handling on old Pentium 5 or WinChip
@@ -1617,6 +1617,7 @@ config KEXEC_JUMP
 
 config PHYSICAL_START
 	hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP || XEN)
+	default 0x100000 if XEN
 	default "0x1000000"
 	---help---
 	  This gives the physical address where the kernel is loaded.
--- sle11sp1-2010-03-22.orig/arch/x86/ia32/ia32entry-xen.S	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/ia32/ia32entry-xen.S	2009-11-06 10:52:09.000000000 +0100
@@ -770,9 +770,11 @@ ia32_sys_call_table:
 	.quad compat_sys_signalfd4
 	.quad sys_eventfd2
 	.quad sys_epoll_create1
-	.quad sys_dup3			/* 330 */
+	.quad sys_dup3				/* 330 */
 	.quad sys_pipe2
 	.quad sys_inotify_init1
 	.quad compat_sys_preadv
 	.quad compat_sys_pwritev
+	.quad compat_sys_rt_tgsigqueueinfo	/* 335 */
+	.quad sys_perf_counter_open
 ia32_syscall_end:
--- sle11sp1-2010-03-22.orig/arch/x86/include/asm/hw_irq.h	2009-11-06 10:51:55.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/include/asm/hw_irq.h	2009-11-06 10:52:09.000000000 +0100
@@ -121,6 +121,7 @@ extern asmlinkage void smp_invalidate_in
 extern irqreturn_t smp_reschedule_interrupt(int, void *);
 extern irqreturn_t smp_call_function_interrupt(int, void *);
 extern irqreturn_t smp_call_function_single_interrupt(int, void *);
+extern irqreturn_t smp_reboot_interrupt(int, void *);
 #endif
 #endif
 
--- sle11sp1-2010-03-22.orig/arch/x86/include/asm/required-features.h	2010-03-22 12:07:54.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/include/asm/required-features.h	2009-11-20 11:04:37.000000000 +0100
@@ -48,7 +48,7 @@
 #endif
 
 #ifdef CONFIG_X86_64
-#ifdef CONFIG_PARAVIRT
+#if defined(CONFIG_PARAVIRT) || defined(CONFIG_XEN)
 /* Paravirtualized systems may not have PSE or PGE available */
 #define NEED_PSE	0
 #define NEED_PGE	0
--- sle11sp1-2010-03-22.orig/arch/x86/include/mach-xen/asm/agp.h	2009-11-06 10:51:47.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/include/mach-xen/asm/agp.h	2009-11-06 10:52:09.000000000 +0100
@@ -31,6 +31,7 @@
 /* Convert a physical address to an address suitable for the GART. */
 #define phys_to_gart(x) phys_to_machine(x)
 #define gart_to_phys(x) machine_to_phys(x)
+#define page_to_gart(x) phys_to_gart(page_to_pseudophys(x))
 
 /* GATT allocation. Returns/accepts GATT kernel virtual address. */
 #define alloc_gatt_pages(order)	({                                          \
--- sle11sp1-2010-03-22.orig/arch/x86/include/mach-xen/asm/desc.h	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/include/mach-xen/asm/desc.h	2009-11-06 10:52:09.000000000 +0100
@@ -1,7 +1,6 @@
 #ifndef _ASM_X86_DESC_H
 #define _ASM_X86_DESC_H
 
-#ifndef __ASSEMBLY__
 #include <asm/desc_defs.h>
 #include <asm/ldt.h>
 #include <asm/mmu.h>
@@ -406,29 +405,4 @@ static inline void set_system_intr_gate_
 }
 #endif
 
-#else
-/*
- * GET_DESC_BASE reads the descriptor base of the specified segment.
- *
- * Args:
- *    idx - descriptor index
- *    gdt - GDT pointer
- *    base - 32bit register to which the base will be written
- *    lo_w - lo word of the "base" register
- *    lo_b - lo byte of the "base" register
- *    hi_b - hi byte of the low word of the "base" register
- *
- * Example:
- *    GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
- *    Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
- */
-#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
-	movb idx * 8 + 4(gdt), lo_b;			\
-	movb idx * 8 + 7(gdt), hi_b;			\
-	shll $16, base;					\
-	movw idx * 8 + 2(gdt), lo_w;
-
-
-#endif /* __ASSEMBLY__ */
-
 #endif /* _ASM_X86_DESC_H */
--- sle11sp1-2010-03-22.orig/arch/x86/include/mach-xen/asm/fixmap.h	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/include/mach-xen/asm/fixmap.h	2009-11-06 10:52:09.000000000 +0100
@@ -118,12 +118,9 @@ enum fixed_addresses {
 #ifdef CONFIG_PARAVIRT
 	FIX_PARAVIRT_BOOTMAP,
 #endif
-	FIX_TEXT_POKE0,	/* reserve 2 pages for text_poke() */
-	FIX_TEXT_POKE1,
+	FIX_TEXT_POKE1,	/* reserve 2 pages for text_poke() */
+	FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
 	__end_of_permanent_fixed_addresses,
-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
-	FIX_OHCI1394_BASE,
-#endif
 	/*
 	 * 256 temporary boot-time mappings, used by early_ioremap(),
 	 * before ioremap() is functional.
@@ -136,6 +133,9 @@ enum fixed_addresses {
 	FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
 			(__end_of_permanent_fixed_addresses & 255),
 	FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+	FIX_OHCI1394_BASE,
+#endif
 #ifdef CONFIG_X86_32
 	FIX_WP_TEST,
 #endif
--- sle11sp1-2010-03-22.orig/arch/x86/include/mach-xen/asm/irq_vectors.h	2009-12-22 13:20:34.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/include/mach-xen/asm/irq_vectors.h	2009-11-06 10:52:09.000000000 +0100
@@ -1,8 +1,11 @@
 #ifndef _ASM_X86_IRQ_VECTORS_H
 #define _ASM_X86_IRQ_VECTORS_H
 
+#define MCE_VECTOR			0x12
+
 #ifdef CONFIG_X86_32
 # define SYSCALL_VECTOR			0x80
+# define IA32_SYSCALL_VECTOR		0x80
 #else
 # define IA32_SYSCALL_VECTOR		0x80
 #endif
@@ -11,7 +14,8 @@
 #define CALL_FUNCTION_VECTOR		1
 #define CALL_FUNC_SINGLE_VECTOR		2
 #define SPIN_UNLOCK_VECTOR		3
-#define NR_IPIS				4
+#define REBOOT_VECTOR			4
+#define NR_IPIS				5
 
 /*
  * The maximum number of vectors supported by i386 processors
--- sle11sp1-2010-03-22.orig/arch/x86/include/mach-xen/asm/pci.h	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/include/mach-xen/asm/pci.h	2009-11-06 10:52:09.000000000 +0100
@@ -97,7 +97,8 @@ extern void pci_iommu_alloc(void);
 
 #define PCI_DMA_BUS_IS_PHYS 0
 
-#if defined(CONFIG_X86_64) || defined(CONFIG_DMA_API_DEBUG) || defined(CONFIG_SWIOTLB)
+#if defined(CONFIG_X86_64) || defined(CONFIG_DMAR) || defined(CONFIG_DMA_API_DEBUG) \
+	|| defined(CONFIG_SWIOTLB)
 
 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)       \
 	        dma_addr_t ADDR_NAME;
@@ -136,6 +137,7 @@ extern void pci_iommu_alloc(void);
 
 /* generic pci stuff */
 #include <asm-generic/pci.h>
+#define PCIBIOS_MAX_MEM_32 0xffffffff
 
 #ifdef CONFIG_NUMA
 /* Returns the node based on pci bus */
--- sle11sp1-2010-03-22.orig/arch/x86/include/mach-xen/asm/pgalloc.h	2009-11-06 10:51:47.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/include/mach-xen/asm/pgalloc.h	2009-11-06 10:52:09.000000000 +0100
@@ -51,7 +51,13 @@ static inline void pte_free(struct mm_st
 	__pte_free(pte);
 }
 
-extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
+extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
+				  unsigned long address)
+{
+	___pte_free_tlb(tlb, pte);
+}
 
 static inline void pmd_populate_kernel(struct mm_struct *mm,
 				       pmd_t *pmd, pte_t *pte)
@@ -92,7 +98,13 @@ static inline void pmd_free(struct mm_st
 	__pmd_free(virt_to_page(pmd));
 }
 
-extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
+extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
+
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+				  unsigned long adddress)
+{
+	___pmd_free_tlb(tlb, pmd);
+}
 
 #ifdef CONFIG_X86_PAE
 extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd);
@@ -145,7 +157,14 @@ static inline void pud_free(struct mm_st
 	__pmd_free(virt_to_page(pud));
 }
 
-extern void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
+extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
+
+static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+				  unsigned long address)
+{
+	___pud_free_tlb(tlb, pud);
+}
+
 #endif	/* PAGETABLE_LEVELS > 3 */
 #endif	/* PAGETABLE_LEVELS > 2 */
 
--- sle11sp1-2010-03-22.orig/arch/x86/include/mach-xen/asm/pgtable.h	2009-11-20 11:15:54.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/include/mach-xen/asm/pgtable.h	2009-11-20 11:17:56.000000000 +0100
@@ -2,6 +2,7 @@
 #define _ASM_X86_PGTABLE_H
 
 #include <asm/page.h>
+#include <asm/e820.h>
 
 #include <asm/pgtable_types.h>
 
@@ -78,6 +79,8 @@ static inline void __init paravirt_paget
 #define pte_val(x)	xen_pte_val(x)
 #define __pte(x)	xen_make_pte(x)
 
+#define arch_end_context_switch(prev)	do {} while(0)
+
 /*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
@@ -265,10 +268,17 @@ static inline pgprot_t pgprot_modify(pgp
 
 #define canon_pgprot(p) __pgprot(massage_pgprot(p))
 
-static inline int is_new_memtype_allowed(unsigned long flags,
-						unsigned long new_flags)
+static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
+					 unsigned long flags,
+					 unsigned long new_flags)
 {
 	/*
+	 * PAT type is always WB for ISA. So no need to check.
+	 */
+	if (is_ISA_range(paddr, paddr + size - 1))
+		return 1;
+
+	/*
 	 * Certain new memtypes are not allowed with certain
 	 * requested memtype:
 	 * - request is uncached, return cannot be write-back
@@ -313,6 +323,11 @@ static inline int pte_present(pte_t a)
 	return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
 }
 
+static inline int pte_hidden(pte_t pte)
+{
+	return pte_flags(pte) & _PAGE_HIDDEN;
+}
+
 static inline int pmd_present(pmd_t pmd)
 {
 #if CONFIG_XEN_COMPAT <= 0x030002
@@ -512,6 +527,8 @@ static inline int pgd_none(pgd_t pgd)
 
 #ifndef __ASSEMBLY__
 
+#define direct_gbpages 0
+
 /* local pte updates need not use xchg for locking */
 static inline pte_t xen_local_ptep_get_and_clear(pte_t *ptep, pte_t res)
 {
--- sle11sp1-2010-03-22.orig/arch/x86/include/mach-xen/asm/pgtable_32.h	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/include/mach-xen/asm/pgtable_32.h	2009-11-06 10:52:09.000000000 +0100
@@ -48,13 +48,17 @@ extern void set_pmd_pfn(unsigned long, u
 #endif
 
 #if defined(CONFIG_HIGHPTE)
+#define __KM_PTE			\
+	(in_nmi() ? KM_NMI_PTE : 	\
+	 in_irq() ? KM_IRQ_PTE :	\
+	 KM_PTE0)
 #define pte_offset_map(dir, address)					\
-	((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) +		\
+	((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) +		\
 	 pte_index((address)))
 #define pte_offset_map_nested(dir, address)				\
 	((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) +		\
 	 pte_index((address)))
-#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0)
+#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE)
 #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
 #else
 #define pte_offset_map(dir, address)					\
--- sle11sp1-2010-03-22.orig/arch/x86/include/mach-xen/asm/pgtable_64.h	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/include/mach-xen/asm/pgtable_64.h	2009-11-06 10:52:09.000000000 +0100
@@ -33,10 +33,6 @@ extern pgd_t init_level4_pgt[];
 
 extern void paging_init(void);
 
-#endif /* !__ASSEMBLY__ */
-
-#ifndef __ASSEMBLY__
-
 #define pte_ERROR(e)							\
 	printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n",		\
 	       __FILE__, __LINE__, &(e), __pte_val(e), pte_pfn(e))
@@ -142,8 +138,6 @@ static inline int pgd_large(pgd_t pgd) {
 
 #define update_mmu_cache(vma, address, pte) do { } while (0)
 
-#define direct_gbpages 0
-
 /* Encode and de-code a swap entry */
 #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
 #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
@@ -178,10 +172,7 @@ extern void cleanup_highmap(void);
 
 /* fs/proc/kcore.c */
 #define	kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK)
-#define	kc_offset_to_vaddr(o)				\
-	(((o) & (1UL << (__VIRTUAL_MASK_SHIFT - 1)))	\
-	 ? ((o) | ~__VIRTUAL_MASK)			\
-	 : (o))
+#define	kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK)
 
 #define __HAVE_ARCH_PTE_SAME
 #endif /* !__ASSEMBLY__ */
--- sle11sp1-2010-03-22.orig/arch/x86/include/mach-xen/asm/pgtable_64_types.h	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/include/mach-xen/asm/pgtable_64_types.h	2009-11-06 10:52:09.000000000 +0100
@@ -51,11 +51,12 @@ typedef union { pteval_t pte; unsigned i
 #define PGDIR_SIZE	(_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE - 1))
 
+/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
 #define MAX_PHYSMEM_BITS 43
 #define MAXMEM		 _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
-#define VMALLOC_START    _AC(0xffffc20000000000, UL)
-#define VMALLOC_END      _AC(0xffffe1ffffffffff, UL)
-#define VMEMMAP_START	 _AC(0xffffe20000000000, UL)
+#define VMALLOC_START    _AC(0xffffc90000000000, UL)
+#define VMALLOC_END      _AC(0xffffe8ffffffffff, UL)
+#define VMEMMAP_START	 _AC(0xffffea0000000000, UL)
 #define MODULES_VADDR    _AC(0xffffffffa0000000, UL)
 #define MODULES_END      _AC(0xffffffffff000000, UL)
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
--- sle11sp1-2010-03-22.orig/arch/x86/include/mach-xen/asm/pgtable_types.h	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/include/mach-xen/asm/pgtable_types.h	2009-11-06 10:52:09.000000000 +0100
@@ -18,7 +18,7 @@
 #define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
 #define _PAGE_BIT_UNUSED1	9	/* available for programmer */
 #define _PAGE_BIT_IOMAP		10	/* flag used to indicate IO mapping */
-#define _PAGE_BIT_UNUSED3	11
+#define _PAGE_BIT_HIDDEN	11	/* hidden by kmemcheck */
 #define _PAGE_BIT_PAT_LARGE	12	/* On 2MB or 1GB pages */
 #define _PAGE_BIT_SPECIAL	_PAGE_BIT_UNUSED1
 #define _PAGE_BIT_CPA_TEST	_PAGE_BIT_UNUSED1
@@ -41,13 +41,18 @@
 #define _PAGE_GLOBAL	(_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
 #define _PAGE_UNUSED1	(_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
 #define _PAGE_IOMAP	(_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
-#define _PAGE_UNUSED3	(_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3)
 #define _PAGE_PAT	(_AT(pteval_t, 1) << _PAGE_BIT_PAT)
 #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
 #define _PAGE_SPECIAL	(_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
 #define _PAGE_CPA_TEST	(_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
 #define __HAVE_ARCH_PTE_SPECIAL
 
+#ifdef CONFIG_KMEMCHECK
+#define _PAGE_HIDDEN	(_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
+#else
+#define _PAGE_HIDDEN	(_AT(pteval_t, 0))
+#endif
+
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 #define _PAGE_NX	(_AT(pteval_t, 1) << _PAGE_BIT_NX)
 #else
@@ -330,7 +335,6 @@ typedef struct page *pgtable_t;
 
 extern pteval_t __supported_pte_mask;
 extern int nx_enabled;
-extern void set_nx(void);
 
 #define pgprot_writecombine	pgprot_writecombine
 extern pgprot_t pgprot_writecombine(pgprot_t prot);
--- sle11sp1-2010-03-22.orig/arch/x86/include/mach-xen/asm/processor.h	2010-03-17 14:36:39.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/include/mach-xen/asm/processor.h	2009-11-06 10:52:09.000000000 +0100
@@ -136,7 +136,8 @@ struct cpuinfo_x86 {
 extern struct cpuinfo_x86	boot_cpu_data;
 extern struct cpuinfo_x86	new_cpu_data;
 
-extern __u32			cleared_cpu_caps[NCAPINTS];
+extern __u32			cpu_caps_cleared[NCAPINTS];
+extern __u32			cpu_caps_set[NCAPINTS];
 
 #ifdef CONFIG_SMP
 DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
@@ -417,9 +418,6 @@ DECLARE_PER_CPU(unsigned long, stack_can
 extern unsigned int xstate_size;
 extern void free_thread_xstate(struct task_struct *);
 extern struct kmem_cache *task_xstate_cachep;
-extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
-extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern unsigned short num_cache_leaves;
 
 struct thread_struct {
 	/* Cached TLS descriptors: */
@@ -435,8 +433,12 @@ struct thread_struct {
 	unsigned short		fsindex;
 	unsigned short		gsindex;
 #endif
+#ifdef CONFIG_X86_32
 	unsigned long		ip;
+#endif
+#ifdef CONFIG_X86_64
 	unsigned long		fs;
+#endif
 	unsigned long		gs;
 	/* Hardware debugging registers: */
 	unsigned long		debugreg0;
@@ -465,14 +467,8 @@ struct thread_struct {
 	unsigned		io_bitmap_max;
 /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
 	unsigned long	debugctlmsr;
-#ifdef CONFIG_X86_DS
-/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
+	/* Debug Store context; see asm/ds.h */
 	struct ds_context	*ds_ctx;
-#endif /* CONFIG_X86_DS */
-#ifdef CONFIG_X86_PTRACE_BTS
-/* the signal to send on a bts buffer overflow */
-	unsigned int	bts_ovfl_signal;
-#endif /* CONFIG_X86_PTRACE_BTS */
 };
 
 static inline unsigned long xen_get_debugreg(int regno)
@@ -742,6 +738,21 @@ static inline unsigned long get_debugctl
     return debugctlmsr;
 }
 
+static inline unsigned long get_debugctlmsr_on_cpu(int cpu)
+{
+	u64 debugctlmsr = 0;
+	u32 val1, val2;
+
+#ifndef CONFIG_X86_DEBUGCTLMSR
+	if (boot_cpu_data.x86 < 6)
+		return 0;
+#endif
+	rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2);
+	debugctlmsr = val1 | ((u64)val2 << 32);
+
+	return debugctlmsr;
+}
+
 static inline void update_debugctlmsr(unsigned long debugctlmsr)
 {
 #ifndef CONFIG_X86_DEBUGCTLMSR
@@ -751,6 +762,18 @@ static inline void update_debugctlmsr(un
 	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
 }
 
+static inline void update_debugctlmsr_on_cpu(int cpu,
+					     unsigned long debugctlmsr)
+{
+#ifndef CONFIG_X86_DEBUGCTLMSR
+	if (boot_cpu_data.x86 < 6)
+		return;
+#endif
+	wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR,
+		     (u32)((u64)debugctlmsr),
+		     (u32)((u64)debugctlmsr >> 32));
+}
+
 /*
  * from system description table in BIOS. Mostly for MCA use, but
  * others may find it useful:
@@ -761,6 +784,7 @@ extern unsigned int		BIOS_revision;
 
 /* Boot loader type from the setup header: */
 extern int			bootloader_type;
+extern int			bootloader_version;
 
 extern char			ignore_fpu_irq;
 
@@ -821,7 +845,6 @@ static inline void spin_lock_prefetch(co
 	.vm86_info		= NULL,					  \
 	.sysenter_cs		= __KERNEL_CS,				  \
 	.io_bitmap_ptr		= NULL,					  \
-	.fs			= __KERNEL_PERCPU,			  \
 }
 
 /*
--- sle11sp1-2010-03-22.orig/arch/x86/include/mach-xen/asm/smp.h	2009-11-20 11:15:20.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/include/mach-xen/asm/smp.h	2009-11-20 11:17:59.000000000 +0100
@@ -195,7 +195,7 @@ extern unsigned disabled_cpus __cpuinitd
 static inline int logical_smp_processor_id(void)
 {
 	/* we don't want to mark this access volatile - bad code generation */
-	return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+	return GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
 }
 
 #endif
--- sle11sp1-2010-03-22.orig/arch/x86/include/mach-xen/asm/spinlock.h	2010-02-23 14:24:49.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/include/mach-xen/asm/spinlock.h	2010-02-23 14:24:59.000000000 +0100
@@ -432,4 +432,8 @@ static inline void __raw_write_unlock(ra
 #define _raw_read_relax(lock)	cpu_relax()
 #define _raw_write_relax(lock)	cpu_relax()
 
+/* The {read|write|spin}_lock() on x86 are full memory barriers. */
+static inline void smp_mb__after_lock(void) { }
+#define ARCH_HAS_SMP_MB_AFTER_LOCK
+
 #endif /* _ASM_X86_SPINLOCK_H */
--- sle11sp1-2010-03-22.orig/arch/x86/include/mach-xen/asm/tlbflush.h	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/include/mach-xen/asm/tlbflush.h	2009-11-06 10:52:09.000000000 +0100
@@ -111,6 +111,6 @@ static inline void flush_tlb_kernel_rang
 	flush_tlb_all();
 }
 
-extern void zap_low_mappings(void);
+extern void zap_low_mappings(bool early);
 
 #endif /* _ASM_X86_TLBFLUSH_H */
--- sle11sp1-2010-03-22.orig/arch/x86/include/mach-xen/asm/xor.h	2009-11-06 10:51:47.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/include/mach-xen/asm/xor.h	2009-11-06 10:52:09.000000000 +0100
@@ -1,4 +1,7 @@
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_KMEMCHECK
+/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */
+# include <asm-generic/xor.h>
+#elif defined(CONFIG_X86_32)
 # include "../../asm/xor_32.h"
 #else
 # include "xor_64.h"
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/Makefile	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/Makefile	2009-11-06 10:52:09.000000000 +0100
@@ -137,6 +137,6 @@ ifeq ($(CONFIG_X86_64),y)
 endif
 
 disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o hpet.o i8253.o \
-	i8259.o irqinit_$(BITS).o pci-swiotlb.o reboot.o smpboot.o tsc.o \
-	tsc_sync.o uv_%.o vsmp_64.o
+	i8259.o irqinit.o pci-swiotlb.o reboot.o smpboot.o tsc.o tsc_sync.o \
+	uv_%.o vsmp_64.o
 disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += probe_roms_32.o
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/acpi/sleep-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/acpi/sleep-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -107,7 +107,7 @@ int acpi_save_state_mem(void)
 	initial_gs = per_cpu_offset(smp_processor_id());
 #endif
 	initial_code = (unsigned long)wakeup_long64;
-	saved_magic = 0x123456789abcdef0;
+       saved_magic = 0x123456789abcdef0L;
 #endif /* CONFIG_64BIT */
 #endif
 
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/apic/io_apic-xen.c	2010-02-18 15:28:25.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/apic/io_apic-xen.c	2010-02-18 15:29:00.000000000 +0100
@@ -59,6 +59,7 @@
 #include <asm/setup.h>
 #include <asm/irq_remapping.h>
 #include <asm/hpet.h>
+#include <asm/hw_irq.h>
 #include <asm/uv/uv_hub.h>
 #include <asm/uv/uv_irq.h>
 
@@ -145,12 +146,9 @@ struct irq_pin_list {
 	struct irq_pin_list *next;
 };
 
-static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+static struct irq_pin_list *get_one_free_irq_2_pin(int node)
 {
 	struct irq_pin_list *pin;
-	int node;
-
-	node = cpu_to_node(cpu);
 
 	pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
 
@@ -164,9 +162,6 @@ struct irq_cfg {
 	unsigned move_cleanup_count;
 	u8 vector;
 	u8 move_in_progress : 1;
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-	u8 move_desc_pending : 1;
-#endif
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -198,16 +193,18 @@ int __init arch_early_irq_init(void)
 	struct irq_cfg *cfg;
 	struct irq_desc *desc;
 	int count;
+	int node;
 	int i;
 
 	cfg = irq_cfgx;
 	count = ARRAY_SIZE(irq_cfgx);
+	node= cpu_to_node(boot_cpu_id);
 
 	for (i = 0; i < count; i++) {
 		desc = irq_to_desc(i);
 		desc->chip_data = &cfg[i];
-		alloc_bootmem_cpumask_var(&cfg[i].domain);
-		alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+		zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
+		zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
 		if (i < NR_IRQS_LEGACY)
 			cpumask_setall(cfg[i].domain);
 	}
@@ -228,12 +225,9 @@ static struct irq_cfg *irq_cfg(unsigned 
 	return cfg;
 }
 
-static struct irq_cfg *get_one_free_irq_cfg(int cpu)
+static struct irq_cfg *get_one_free_irq_cfg(int node)
 {
 	struct irq_cfg *cfg;
-	int node;
-
-	node = cpu_to_node(cpu);
 
 	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
 	if (cfg) {
@@ -254,13 +248,13 @@ static struct irq_cfg *get_one_free_irq_
 	return cfg;
 }
 
-int arch_init_chip_data(struct irq_desc *desc, int cpu)
+int arch_init_chip_data(struct irq_desc *desc, int node)
 {
 	struct irq_cfg *cfg;
 
 	cfg = desc->chip_data;
 	if (!cfg) {
-		desc->chip_data = get_one_free_irq_cfg(cpu);
+		desc->chip_data = get_one_free_irq_cfg(node);
 		if (!desc->chip_data) {
 			printk(KERN_ERR "can not alloc irq_cfg\n");
 			BUG_ON(1);
@@ -270,10 +264,9 @@ int arch_init_chip_data(struct irq_desc 
 	return 0;
 }
 
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-
+/* for move_irq_desc */
 static void
-init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
 {
 	struct irq_pin_list *old_entry, *head, *tail, *entry;
 
@@ -282,7 +275,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_
 	if (!old_entry)
 		return;
 
-	entry = get_one_free_irq_2_pin(cpu);
+	entry = get_one_free_irq_2_pin(node);
 	if (!entry)
 		return;
 
@@ -292,7 +285,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_
 	tail		= entry;
 	old_entry	= old_entry->next;
 	while (old_entry) {
-		entry = get_one_free_irq_2_pin(cpu);
+		entry = get_one_free_irq_2_pin(node);
 		if (!entry) {
 			entry = head;
 			while (entry) {
@@ -332,12 +325,12 @@ static void free_irq_2_pin(struct irq_cf
 }
 
 void arch_init_copy_chip_data(struct irq_desc *old_desc,
-				 struct irq_desc *desc, int cpu)
+				 struct irq_desc *desc, int node)
 {
 	struct irq_cfg *cfg;
 	struct irq_cfg *old_cfg;
 
-	cfg = get_one_free_irq_cfg(cpu);
+	cfg = get_one_free_irq_cfg(node);
 
 	if (!cfg)
 		return;
@@ -348,7 +341,7 @@ void arch_init_copy_chip_data(struct irq
 
 	memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
 
-	init_copy_irq_2_pin(old_cfg, cfg, cpu);
+	init_copy_irq_2_pin(old_cfg, cfg, node);
 }
 
 static void free_irq_cfg(struct irq_cfg *old_cfg)
@@ -372,19 +365,7 @@ void arch_free_chip_data(struct irq_desc
 		old_desc->chip_data = NULL;
 	}
 }
-
-static void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-	struct irq_cfg *cfg = desc->chip_data;
-
-	if (!cfg->move_in_progress) {
-		/* it means that domain is not changed */
-		if (!cpumask_intersects(desc->affinity, mask))
-			cfg->move_desc_pending = 1;
-	}
-}
-#endif
+/* end for move_irq_desc */
 
 #else
 static struct irq_cfg *irq_cfg(unsigned int irq)
@@ -394,13 +375,6 @@ static struct irq_cfg *irq_cfg(unsigned 
 
 #endif
 
-#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
-static inline void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-}
-#endif
-
 struct io_apic {
 	unsigned int index;
 	unsigned int unused[3];
@@ -532,7 +506,8 @@ static struct IO_APIC_route_entry ioapic
 static void
 __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 {
-	union entry_union eu;
+	union entry_union eu = {{0, 0}};
+
 	eu.entry = e;
 	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
 	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
@@ -563,132 +538,18 @@ static void ioapic_mask_entry(int apic, 
 	spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-#ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
-{
-	cpumask_var_t cleanup_mask;
-
-	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-		unsigned int i;
-		cfg->move_cleanup_count = 0;
-		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-			cfg->move_cleanup_count++;
-		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-			apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
-	} else {
-		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
-		cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
-		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		free_cpumask_var(cleanup_mask);
-	}
-	cfg->move_in_progress = 0;
-}
-
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
-{
-	int apic, pin;
-	struct irq_pin_list *entry;
-	u8 vector = cfg->vector;
-
-	entry = cfg->irq_2_pin;
-	for (;;) {
-		unsigned int reg;
-
-		if (!entry)
-			break;
-
-		apic = entry->apic;
-		pin = entry->pin;
-		/*
-		 * With interrupt-remapping, destination information comes
-		 * from interrupt-remapping table entry.
-		 */
-		if (!irq_remapped(irq))
-			io_apic_write(apic, 0x11 + pin*2, dest);
-		reg = io_apic_read(apic, 0x10 + pin*2);
-		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
-		reg |= vector;
-		io_apic_modify(apic, 0x10 + pin*2, reg);
-		if (!entry->next)
-			break;
-		entry = entry->next;
-	}
-}
-
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
-/*
- * Either sets desc->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
- * leaves desc->affinity untouched.
- */
-static unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
-{
-	struct irq_cfg *cfg;
-	unsigned int irq;
-
-	if (!cpumask_intersects(mask, cpu_online_mask))
-		return BAD_APICID;
-
-	irq = desc->irq;
-	cfg = desc->chip_data;
-	if (assign_irq_vector(irq, cfg, mask))
-		return BAD_APICID;
-
-	/* check that before desc->addinity get updated */
-	set_extra_move_desc(desc, mask);
-
-	cpumask_copy(desc->affinity, mask);
-
-	return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
-}
-
-static void
-set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	unsigned int dest;
-	unsigned int irq;
-
-	irq = desc->irq;
-	cfg = desc->chip_data;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	dest = set_desc_affinity(desc, mask);
-	if (dest != BAD_APICID) {
-		/* Only the high 8 bits are valid. */
-		dest = SET_APIC_LOGICAL_ID(dest);
-		__target_IO_APIC_irq(irq, dest, cfg);
-	}
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void
-set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
-{
-	struct irq_desc *desc;
-
-	desc = irq_to_desc(irq);
-
-	set_ioapic_affinity_irq_desc(desc, mask);
-}
-#endif /* CONFIG_SMP */
-
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
 {
 	struct irq_pin_list *entry;
 
 	entry = cfg->irq_2_pin;
 	if (!entry) {
-		entry = get_one_free_irq_2_pin(cpu);
+		entry = get_one_free_irq_2_pin(node);
 		if (!entry) {
 			printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
 					apic, pin);
@@ -708,7 +569,7 @@ static void add_pin_to_irq_cpu(struct ir
 		entry = entry->next;
 	}
 
-	entry->next = get_one_free_irq_2_pin(cpu);
+	entry->next = get_one_free_irq_2_pin(node);
 	entry = entry->next;
 	entry->apic = apic;
 	entry->pin = pin;
@@ -717,7 +578,7 @@ static void add_pin_to_irq_cpu(struct ir
 /*
  * Reroute an IRQ to a different pin.
  */
-static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
+static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
 				      int oldapic, int oldpin,
 				      int newapic, int newpin)
 {
@@ -737,7 +598,7 @@ static void __init replace_pin_at_irq_cp
 
 	/* why? call replace before add? */
 	if (!replaced)
-		add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
+		add_pin_to_irq_node(cfg, node, newapic, newpin);
 }
 
 static inline void io_apic_modify_irq(struct irq_cfg *cfg,
@@ -857,7 +718,7 @@ static void clear_IO_APIC (void)
 			clear_IO_APIC_pin(apic, pin);
 }
 #else
-#define add_pin_to_irq_cpu(cfg, cpu, apic, pin)
+#define add_pin_to_irq_node(cfg, node, apic, pin)
 #endif /* CONFIG_XEN */
 
 #ifdef CONFIG_X86_32
@@ -898,7 +759,7 @@ static int __init ioapic_pirq_setup(char
 __setup("pirq=", ioapic_pirq_setup);
 #endif /* CONFIG_X86_32 */
 
-#ifdef CONFIG_INTR_REMAP
+#ifndef CONFIG_XEN
 struct IO_APIC_route_entry **alloc_ioapic_entries(void)
 {
 	int apic;
@@ -996,20 +857,6 @@ int restore_IO_APIC_setup(struct IO_APIC
 	return 0;
 }
 
-void reinit_intr_remapped_IO_APIC(int intr_remapping,
-	struct IO_APIC_route_entry **ioapic_entries)
-
-{
-	/*
-	 * for now plain restore of previous settings.
-	 * TBD: In the case of OS enabling interrupt-remapping,
-	 * IO-APIC RTE's need to be setup to point to interrupt-remapping
-	 * table entries. for now, do a plain restore, and wait for
-	 * the setup_IO_APIC_irqs() to do proper initialization.
-	 */
-	restore_IO_APIC_setup(ioapic_entries);
-}
-
 void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
 {
 	int apic;
@@ -1019,7 +866,7 @@ void free_ioapic_entries(struct IO_APIC_
 
 	kfree(ioapic_entries);
 }
-#endif
+#endif /* CONFIG_XEN */
 
 /*
  * Find the IRQ entry number of a certain pin.
@@ -1082,54 +929,6 @@ static int __init find_isa_irq_apic(int 
 }
 #endif
 
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
-	int apic, i, best_guess = -1;
-
-	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
-		bus, slot, pin);
-	if (test_bit(bus, mp_bus_not_pci)) {
-		apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
-		return -1;
-	}
-	for (i = 0; i < mp_irq_entries; i++) {
-		int lbus = mp_irqs[i].srcbus;
-
-		for (apic = 0; apic < nr_ioapics; apic++)
-			if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
-			    mp_irqs[i].dstapic == MP_APIC_ALL)
-				break;
-
-		if (!test_bit(lbus, mp_bus_not_pci) &&
-		    !mp_irqs[i].irqtype &&
-		    (bus == lbus) &&
-		    (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
-			int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
-
-			if (!(apic || IO_APIC_IRQ(irq)))
-				continue;
-
-			if (pin == (mp_irqs[i].srcbusirq & 3))
-				return irq;
-			/*
-			 * Use the first all-but-pin matching entry as a
-			 * best-guess fuzzy result for broken mptables.
-			 */
-			if (best_guess < 0)
-				best_guess = irq;
-		}
-	}
-	return best_guess;
-}
-
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 /*
  * EISA Edge/Level control register, ELCR
@@ -1348,6 +1147,64 @@ static int pin_2_irq(int idx, int apic, 
 	return irq;
 }
 
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
+				struct io_apic_irq_attr *irq_attr)
+{
+	int apic, i, best_guess = -1;
+
+	apic_printk(APIC_DEBUG,
+		    "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+		    bus, slot, pin);
+	if (test_bit(bus, mp_bus_not_pci)) {
+		apic_printk(APIC_VERBOSE,
+			    "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+		return -1;
+	}
+	for (i = 0; i < mp_irq_entries; i++) {
+		int lbus = mp_irqs[i].srcbus;
+
+		for (apic = 0; apic < nr_ioapics; apic++)
+			if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
+			    mp_irqs[i].dstapic == MP_APIC_ALL)
+				break;
+
+		if (!test_bit(lbus, mp_bus_not_pci) &&
+		    !mp_irqs[i].irqtype &&
+		    (bus == lbus) &&
+		    (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
+			int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
+
+			if (!(apic || IO_APIC_IRQ(irq)))
+				continue;
+
+			if (pin == (mp_irqs[i].srcbusirq & 3)) {
+				set_io_apic_irq_attr(irq_attr, apic,
+						     mp_irqs[i].dstirq,
+						     irq_trigger(i),
+						     irq_polarity(i));
+				return irq;
+			}
+			/*
+			 * Use the first all-but-pin matching entry as a
+			 * best-guess fuzzy result for broken mptables.
+			 */
+			if (best_guess < 0) {
+				set_io_apic_irq_attr(irq_attr, apic,
+						     mp_irqs[i].dstirq,
+						     irq_trigger(i),
+						     irq_polarity(i));
+				best_guess = irq;
+			}
+		}
+	}
+	return best_guess;
+}
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
 #ifndef CONFIG_XEN
 void lock_vector_lock(void)
 {
@@ -1619,6 +1476,9 @@ int setup_ioapic_entry(int apic_id, int 
 		irte.vector = vector;
 		irte.dest_id = IRTE_DEST(destination);
 
+		/* Set source-id of interrupt request */
+		set_ioapic_sid(&irte, apic_id);
+
 		modify_irte(irq, &irte);
 
 		ir_entry->index2 = (index >> 15) & 0x1;
@@ -1693,63 +1553,75 @@ static void setup_IO_APIC_irq(int apic_i
 	ioapic_write_entry(apic_id, pin, entry);
 }
 
+static struct {
+	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+} mp_ioapic_routing[MAX_IO_APICS];
+
 static void __init setup_IO_APIC_irqs(void)
 {
-	int apic_id, pin, idx, irq;
+	int apic_id = 0, pin, idx, irq;
 	int notcon = 0;
 	struct irq_desc *desc;
 	struct irq_cfg *cfg;
-	int cpu = boot_cpu_id;
+	int node = cpu_to_node(boot_cpu_id);
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
-	for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
-		for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
+#ifdef CONFIG_ACPI
+	if (!acpi_disabled && acpi_ioapic) {
+		apic_id = mp_find_ioapic(0);
+		if (apic_id < 0)
+			apic_id = 0;
+	}
+#endif
 
-			idx = find_irq_entry(apic_id, pin, mp_INT);
-			if (idx == -1) {
-				if (!notcon) {
-					notcon = 1;
-					apic_printk(APIC_VERBOSE,
-						KERN_DEBUG " %d-%d",
-						mp_ioapics[apic_id].apicid, pin);
-				} else
-					apic_printk(APIC_VERBOSE, " %d-%d",
-						mp_ioapics[apic_id].apicid, pin);
-				continue;
-			}
-			if (notcon) {
+	for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
+		idx = find_irq_entry(apic_id, pin, mp_INT);
+		if (idx == -1) {
+			if (!notcon) {
+				notcon = 1;
 				apic_printk(APIC_VERBOSE,
-					" (apicid-pin) not connected\n");
-				notcon = 0;
-			}
+					KERN_DEBUG " %d-%d",
+					mp_ioapics[apic_id].apicid, pin);
+			} else
+				apic_printk(APIC_VERBOSE, " %d-%d",
+					mp_ioapics[apic_id].apicid, pin);
+			continue;
+		}
+		if (notcon) {
+			apic_printk(APIC_VERBOSE,
+				" (apicid-pin) not connected\n");
+			notcon = 0;
+		}
 
-			irq = pin_2_irq(idx, apic_id, pin);
+		irq = pin_2_irq(idx, apic_id, pin);
 
 #ifdef CONFIG_XEN
-			if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
-				continue;
+		if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
+			continue;
 #else
-			/*
-			 * Skip the timer IRQ if there's a quirk handler
-			 * installed and if it returns 1:
-			 */
-			if (apic->multi_timer_check &&
-					apic->multi_timer_check(apic_id, irq))
-				continue;
+		/*
+		 * Skip the timer IRQ if there's a quirk handler
+		 * installed and if it returns 1:
+		 */
+		if (apic->multi_timer_check &&
+				apic->multi_timer_check(apic_id, irq))
+			continue;
 #endif
 
-			desc = irq_to_desc_alloc_cpu(irq, cpu);
-			if (!desc) {
-				printk(KERN_INFO "can not get irq_desc for %d\n", irq);
-				continue;
-			}
-			cfg = desc->chip_data;
-			add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
-
-			setup_IO_APIC_irq(apic_id, pin, irq, desc,
-					irq_trigger(idx), irq_polarity(idx));
+		desc = irq_to_desc_alloc_node(irq, node);
+		if (!desc) {
+			printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+			continue;
 		}
+		cfg = desc->chip_data;
+		add_pin_to_irq_node(cfg, node, apic_id, pin);
+		/*
+		 * don't mark it in pin_programmed, so later acpi could
+		 * set it correctly when irq < 16
+		 */
+		setup_IO_APIC_irq(apic_id, pin, irq, desc,
+				irq_trigger(idx), irq_polarity(idx));
 	}
 
 	if (notcon)
@@ -1917,36 +1789,30 @@ __apicdebuginit(void) print_IO_APIC(void
 	return;
 }
 
-__apicdebuginit(void) print_APIC_bitfield(int base)
+__apicdebuginit(void) print_APIC_field(int base)
 {
-	unsigned int v;
-	int i, j;
+	int i;
 
 	if (apic_verbosity == APIC_QUIET)
 		return;
 
-	printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
-	for (i = 0; i < 8; i++) {
-		v = apic_read(base + i*0x10);
-		for (j = 0; j < 32; j++) {
-			if (v & (1<<j))
-				printk("1");
-			else
-				printk("0");
-		}
-		printk("\n");
-	}
+	printk(KERN_DEBUG);
+
+	for (i = 0; i < 8; i++)
+		printk(KERN_CONT "%08x", apic_read(base + i*0x10));
+
+	printk(KERN_CONT "\n");
 }
 
 __apicdebuginit(void) print_local_APIC(void *dummy)
 {
-	unsigned int v, ver, maxlvt;
+	unsigned int i, v, ver, maxlvt;
 	u64 icr;
 
 	if (apic_verbosity == APIC_QUIET)
 		return;
 
-	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+	printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 		smp_processor_id(), hard_smp_processor_id());
 	v = apic_read(APIC_ID);
 	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
@@ -1987,11 +1853,11 @@ __apicdebuginit(void) print_local_APIC(v
 	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
 
 	printk(KERN_DEBUG "... APIC ISR field:\n");
-	print_APIC_bitfield(APIC_ISR);
+	print_APIC_field(APIC_ISR);
 	printk(KERN_DEBUG "... APIC TMR field:\n");
-	print_APIC_bitfield(APIC_TMR);
+	print_APIC_field(APIC_TMR);
 	printk(KERN_DEBUG "... APIC IRR field:\n");
-	print_APIC_bitfield(APIC_IRR);
+	print_APIC_field(APIC_IRR);
 
 	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
 		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
@@ -2028,6 +1894,18 @@ __apicdebuginit(void) print_local_APIC(v
 	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
 	v = apic_read(APIC_TDCR);
 	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+
+	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+		v = apic_read(APIC_EFEAT);
+		maxlvt = (v >> 16) & 0xff;
+		printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
+		v = apic_read(APIC_ECTRL);
+		printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
+		for (i = 0; i < maxlvt; i++) {
+			v = apic_read(APIC_EILVTn(i));
+			printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
+		}
+	}
 	printk("\n");
 }
 
@@ -2076,6 +1954,11 @@ __apicdebuginit(void) print_PIC(void)
 __apicdebuginit(int) print_all_ICs(void)
 {
 	print_PIC();
+
+	/* don't print out if apic is not there */
+	if (!cpu_has_apic || disable_apic)
+		return 0;
+
 	print_all_local_APICs();
 	print_IO_APIC();
 
@@ -2199,7 +2082,9 @@ void disable_IO_APIC(void)
 	/*
 	 * Use virtual wire A mode when interrupt remapping is enabled.
 	 */
-	disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1);
+	if (cpu_has_apic)
+		disconnect_bsp_APIC(!intr_remapping_enabled &&
+				ioapic_i8259.pin != -1);
 }
 
 #ifdef CONFIG_X86_32
@@ -2438,7 +2323,119 @@ static int ioapic_retrigger_irq(unsigned
  * races.
  */
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_SMP
+static void send_cleanup_vector(struct irq_cfg *cfg)
+{
+	cpumask_var_t cleanup_mask;
+
+	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+		unsigned int i;
+		cfg->move_cleanup_count = 0;
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			cfg->move_cleanup_count++;
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+	} else {
+		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+		cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		free_cpumask_var(cleanup_mask);
+	}
+	cfg->move_in_progress = 0;
+}
+
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+{
+	int apic, pin;
+	struct irq_pin_list *entry;
+	u8 vector = cfg->vector;
+
+	entry = cfg->irq_2_pin;
+	for (;;) {
+		unsigned int reg;
+
+		if (!entry)
+			break;
+
+		apic = entry->apic;
+		pin = entry->pin;
+		/*
+		 * With interrupt-remapping, destination information comes
+		 * from interrupt-remapping table entry.
+		 */
+		if (!irq_remapped(irq))
+			io_apic_write(apic, 0x11 + pin*2, dest);
+		reg = io_apic_read(apic, 0x10 + pin*2);
+		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+		reg |= vector;
+		io_apic_modify(apic, 0x10 + pin*2, reg);
+		if (!entry->next)
+			break;
+		entry = entry->next;
+	}
+}
+
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
+
+/*
+ * Either sets desc->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
+ * leaves desc->affinity untouched.
+ */
+static unsigned int
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+{
+	struct irq_cfg *cfg;
+	unsigned int irq;
+
+	if (!cpumask_intersects(mask, cpu_online_mask))
+		return BAD_APICID;
+
+	irq = desc->irq;
+	cfg = desc->chip_data;
+	if (assign_irq_vector(irq, cfg, mask))
+		return BAD_APICID;
+
+	cpumask_copy(desc->affinity, mask);
+
+	return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+}
+
+static int
+set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	unsigned int dest;
+	unsigned int irq;
+	int ret = -1;
+
+	irq = desc->irq;
+	cfg = desc->chip_data;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	dest = set_desc_affinity(desc, mask);
+	if (dest != BAD_APICID) {
+		/* Only the high 8 bits are valid. */
+		dest = SET_APIC_LOGICAL_ID(dest);
+		__target_IO_APIC_irq(irq, dest, cfg);
+		ret = 0;
+	}
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return ret;
+}
+
+static int
+set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
+{
+	struct irq_desc *desc;
+
+	desc = irq_to_desc(irq);
+
+	return set_ioapic_affinity_irq_desc(desc, mask);
+}
 
 #ifdef CONFIG_INTR_REMAP
 
@@ -2453,26 +2450,25 @@ static int ioapic_retrigger_irq(unsigned
  * Real vector that is used for interrupting cpu will be coming from
  * the interrupt-remapping table entry.
  */
-static void
+static int
 migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
 	struct irq_cfg *cfg;
 	struct irte irte;
 	unsigned int dest;
 	unsigned int irq;
+	int ret = -1;
 
 	if (!cpumask_intersects(mask, cpu_online_mask))
-		return;
+		return ret;
 
 	irq = desc->irq;
 	if (get_irte(irq, &irte))
-		return;
+		return ret;
 
 	cfg = desc->chip_data;
 	if (assign_irq_vector(irq, cfg, mask))
-		return;
-
-	set_extra_move_desc(desc, mask);
+		return ret;
 
 	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
 
@@ -2488,27 +2484,30 @@ migrate_ioapic_irq_desc(struct irq_desc 
 		send_cleanup_vector(cfg);
 
 	cpumask_copy(desc->affinity, mask);
+
+	return 0;
 }
 
 /*
  * Migrates the IRQ destination in the process context.
  */
-static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
 					    const struct cpumask *mask)
 {
-	migrate_ioapic_irq_desc(desc, mask);
+	return migrate_ioapic_irq_desc(desc, mask);
 }
-static void set_ir_ioapic_affinity_irq(unsigned int irq,
+static int set_ir_ioapic_affinity_irq(unsigned int irq,
 				       const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
-	set_ir_ioapic_affinity_irq_desc(desc, mask);
+	return set_ir_ioapic_affinity_irq_desc(desc, mask);
 }
 #else
-static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
 						   const struct cpumask *mask)
 {
+	return 0;
 }
 #endif
 
@@ -2570,86 +2569,19 @@ static void irq_complete_move(struct irq
 	struct irq_cfg *cfg = desc->chip_data;
 	unsigned vector, me;
 
-	if (likely(!cfg->move_in_progress)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-		if (likely(!cfg->move_desc_pending))
-			return;
-
-		/* domain has not changed, but affinity did */
-		me = smp_processor_id();
-		if (cpumask_test_cpu(me, desc->affinity)) {
-			*descp = desc = move_irq_desc(desc, me);
-			/* get the new one */
-			cfg = desc->chip_data;
-			cfg->move_desc_pending = 0;
-		}
-#endif
+	if (likely(!cfg->move_in_progress))
 		return;
-	}
 
 	vector = ~get_irq_regs()->orig_ax;
 	me = smp_processor_id();
 
-	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-		*descp = desc = move_irq_desc(desc, me);
-		/* get the new one */
-		cfg = desc->chip_data;
-#endif
+	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
 		send_cleanup_vector(cfg);
-	}
 }
 #else
 static inline void irq_complete_move(struct irq_desc **descp) {}
 #endif
 
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
-{
-	int apic, pin;
-	struct irq_pin_list *entry;
-
-	entry = cfg->irq_2_pin;
-	for (;;) {
-
-		if (!entry)
-			break;
-
-		apic = entry->apic;
-		pin = entry->pin;
-		io_apic_eoi(apic, pin);
-		entry = entry->next;
-	}
-}
-
-static void
-eoi_ioapic_irq(struct irq_desc *desc)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	unsigned int irq;
-
-	irq = desc->irq;
-	cfg = desc->chip_data;
-
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__eoi_ioapic_irq(irq, cfg);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#ifdef CONFIG_X86_X2APIC
-static void ack_x2apic_level(unsigned int irq)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-	ack_x2APIC_irq();
-	eoi_ioapic_irq(desc);
-}
-
-static void ack_x2apic_edge(unsigned int irq)
-{
-	ack_x2APIC_irq();
-}
-#endif
-
 static void ack_apic_edge(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -2713,9 +2645,6 @@ static void ack_apic_level(unsigned int 
 	 */
 	ack_APIC_irq();
 
-	if (irq_remapped(irq))
-		eoi_ioapic_irq(desc);
-
 	/* Now we can move and renable the irq */
 	if (unlikely(do_unmask_irq)) {
 		/* Only migrate the irq if the ack has been received.
@@ -2762,22 +2691,50 @@ static void ack_apic_level(unsigned int 
 }
 
 #ifdef CONFIG_INTR_REMAP
+static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+	int apic, pin;
+	struct irq_pin_list *entry;
+
+	entry = cfg->irq_2_pin;
+	for (;;) {
+
+		if (!entry)
+			break;
+
+		apic = entry->apic;
+		pin = entry->pin;
+		io_apic_eoi(apic, pin);
+		entry = entry->next;
+	}
+}
+
+static void
+eoi_ioapic_irq(struct irq_desc *desc)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	unsigned int irq;
+
+	irq = desc->irq;
+	cfg = desc->chip_data;
+
+	spin_lock_irqsave(&ioapic_lock, flags);
+	__eoi_ioapic_irq(irq, cfg);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
 static void ir_ack_apic_edge(unsigned int irq)
 {
-#ifdef CONFIG_X86_X2APIC
-       if (x2apic_enabled())
-               return ack_x2apic_edge(irq);
-#endif
-       return ack_apic_edge(irq);
+	ack_APIC_irq();
 }
 
 static void ir_ack_apic_level(unsigned int irq)
 {
-#ifdef CONFIG_X86_X2APIC
-       if (x2apic_enabled())
-               return ack_x2apic_level(irq);
-#endif
-       return ack_apic_level(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	ack_APIC_irq();
+	eoi_ioapic_irq(desc);
 }
 #endif /* CONFIG_INTR_REMAP */
 
@@ -2988,7 +2945,7 @@ static inline void __init check_timer(vo
 {
 	struct irq_desc *desc = irq_to_desc(0);
 	struct irq_cfg *cfg = desc->chip_data;
-	int cpu = boot_cpu_id;
+	int node = cpu_to_node(boot_cpu_id);
 	int apic1, pin1, apic2, pin2;
 	unsigned long flags;
 	int no_pin1 = 0;
@@ -3054,7 +3011,7 @@ static inline void __init check_timer(vo
 		 * Ok, does IRQ0 through the IOAPIC work?
 		 */
 		if (no_pin1) {
-			add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
+			add_pin_to_irq_node(cfg, node, apic1, pin1);
 			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
 		} else {
 			/* for edge trigger, setup_IO_APIC_irq already
@@ -3091,7 +3048,7 @@ static inline void __init check_timer(vo
 		/*
 		 * legacy devices should be connected to IO APIC #0
 		 */
-		replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
+		replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
 		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
 		enable_8259A_irq(0);
 		if (timer_irq_works()) {
@@ -3321,14 +3278,13 @@ static int nr_irqs_gsi = NR_IRQS_LEGACY;
 /*
  * Dynamic irq allocate and deallocation
  */
-unsigned int create_irq_nr(unsigned int irq_want)
+unsigned int create_irq_nr(unsigned int irq_want, int node)
 {
 	/* Allocate an unused irq */
 	unsigned int irq;
 	unsigned int new;
 	unsigned long flags;
 	struct irq_cfg *cfg_new = NULL;
-	int cpu = boot_cpu_id;
 	struct irq_desc *desc_new = NULL;
 
 	irq = 0;
@@ -3337,7 +3293,7 @@ unsigned int create_irq_nr(unsigned int 
 
 	spin_lock_irqsave(&vector_lock, flags);
 	for (new = irq_want; new < nr_irqs; new++) {
-		desc_new = irq_to_desc_alloc_cpu(new, cpu);
+		desc_new = irq_to_desc_alloc_node(new, node);
 		if (!desc_new) {
 			printk(KERN_INFO "can not get irq_desc for %d\n", new);
 			continue;
@@ -3346,6 +3302,9 @@ unsigned int create_irq_nr(unsigned int 
 
 		if (cfg_new->vector != 0)
 			continue;
+
+		desc_new = move_irq_desc(desc_new, node);
+
 		if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
 			irq = new;
 		break;
@@ -3363,11 +3322,12 @@ unsigned int create_irq_nr(unsigned int 
 
 int create_irq(void)
 {
+	int node = cpu_to_node(boot_cpu_id);
 	unsigned int irq_want;
 	int irq;
 
 	irq_want = nr_irqs_gsi;
-	irq = create_irq_nr(irq_want);
+	irq = create_irq_nr(irq_want, node);
 
 	if (irq == 0)
 		irq = -1;
@@ -3433,6 +3393,9 @@ static int msi_compose_msg(struct pci_de
 		irte.vector = cfg->vector;
 		irte.dest_id = IRTE_DEST(dest);
 
+		/* Set source-id of interrupt request */
+		set_msi_sid(&irte, pdev);
+
 		modify_irte(irq, &irte);
 
 		msg->address_hi = MSI_ADDR_BASE_HI;
@@ -3470,7 +3433,7 @@ static int msi_compose_msg(struct pci_de
 }
 
 #ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3479,7 +3442,7 @@ static void set_msi_irq_affinity(unsigne
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
@@ -3491,13 +3454,15 @@ static void set_msi_irq_affinity(unsigne
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	write_msi_msg_desc(desc, &msg);
+
+	return 0;
 }
 #ifdef CONFIG_INTR_REMAP
 /*
  * Migrate the MSI irq to another cpumask. This migration is
  * done in the process context using interrupt-remapping hardware.
  */
-static void
+static int
 ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -3506,11 +3471,11 @@ ir_set_msi_irq_affinity(unsigned int irq
 	struct irte irte;
 
 	if (get_irte(irq, &irte))
-		return;
+		return -1;
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	irte.vector = cfg->vector;
 	irte.dest_id = IRTE_DEST(dest);
@@ -3527,6 +3492,8 @@ ir_set_msi_irq_affinity(unsigned int irq
 	 */
 	if (cfg->move_in_progress)
 		send_cleanup_vector(cfg);
+
+	return 0;
 }
 
 #endif
@@ -3622,15 +3589,17 @@ int arch_setup_msi_irqs(struct pci_dev *
 	unsigned int irq_want;
 	struct intel_iommu *iommu = NULL;
 	int index = 0;
+	int node;
 
 	/* x86 doesn't support multiple MSI yet */
 	if (type == PCI_CAP_ID_MSI && nvec > 1)
 		return 1;
 
+	node = dev_to_node(&dev->dev);
 	irq_want = nr_irqs_gsi;
 	sub_handle = 0;
 	list_for_each_entry(msidesc, &dev->msi_list, list) {
-		irq = create_irq_nr(irq_want);
+		irq = create_irq_nr(irq_want, node);
 		if (irq == 0)
 			return -1;
 		irq_want = irq + 1;
@@ -3680,7 +3649,7 @@ void arch_teardown_msi_irq(unsigned int 
 
 #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3689,7 +3658,7 @@ static void dmar_msi_set_affinity(unsign
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
@@ -3701,11 +3670,13 @@ static void dmar_msi_set_affinity(unsign
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	dmar_msi_write(irq, &msg);
+
+	return 0;
 }
 
 #endif /* CONFIG_SMP */
 
-struct irq_chip dmar_msi_type = {
+static struct irq_chip dmar_msi_type = {
 	.name = "DMAR_MSI",
 	.unmask = dmar_msi_unmask,
 	.mask = dmar_msi_mask,
@@ -3734,7 +3705,7 @@ int arch_setup_dmar_msi(unsigned int irq
 #ifdef CONFIG_HPET_TIMER
 
 #ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3743,7 +3714,7 @@ static void hpet_msi_set_affinity(unsign
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
@@ -3755,6 +3726,8 @@ static void hpet_msi_set_affinity(unsign
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
 	hpet_msi_write(irq, &msg);
+
+	return 0;
 }
 
 #endif /* CONFIG_SMP */
@@ -3811,7 +3784,7 @@ static void target_ht_irq(unsigned int i
 	write_ht_irq_msg(irq, &msg);
 }
 
-static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 	struct irq_cfg *cfg;
@@ -3819,11 +3792,13 @@ static void set_ht_irq_affinity(unsigned
 
 	dest = set_desc_affinity(desc, mask);
 	if (dest == BAD_APICID)
-		return;
+		return -1;
 
 	cfg = desc->chip_data;
 
 	target_ht_irq(irq, dest, cfg->vector);
+
+	return 0;
 }
 
 #endif
@@ -3898,6 +3873,8 @@ int arch_enable_uv_irq(char *irq_name, u
 	unsigned long flags;
 	int err;
 
+	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
 	cfg = irq_cfg(irq);
 
 	err = assign_irq_vector(irq, cfg, eligible_cpu);
@@ -3911,19 +3888,20 @@ int arch_enable_uv_irq(char *irq_name, u
 
 	mmr_value = 0;
 	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
-	entry->vector = cfg->vector;
-	entry->delivery_mode = apic->irq_delivery_mode;
-	entry->dest_mode = apic->irq_dest_mode;
-	entry->polarity = 0;
-	entry->trigger = 0;
-	entry->mask = 0;
-	entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
+	entry->vector		= cfg->vector;
+	entry->delivery_mode	= apic->irq_delivery_mode;
+	entry->dest_mode	= apic->irq_dest_mode;
+	entry->polarity		= 0;
+	entry->trigger		= 0;
+	entry->mask		= 0;
+	entry->dest		= apic->cpu_mask_to_apicid(eligible_cpu);
 
 	mmr_pnode = uv_blade_to_pnode(mmr_blade);
 	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
 
+	if (cfg->move_in_progress)
+		send_cleanup_vector(cfg);
+
 	return irq;
 }
 
@@ -3937,10 +3915,10 @@ void arch_disable_uv_irq(int mmr_blade, 
 	struct uv_IO_APIC_route_entry *entry;
 	int mmr_pnode;
 
+	BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
 	mmr_value = 0;
 	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
 	entry->mask = 1;
 
 	mmr_pnode = uv_blade_to_pnode(mmr_blade);
@@ -4006,6 +3984,78 @@ int __init arch_probe_nr_irqs(void)
 #endif
 #endif /* CONFIG_XEN */
 
+static int __io_apic_set_pci_routing(struct device *dev, int irq,
+				struct io_apic_irq_attr *irq_attr)
+{
+	struct irq_desc *desc;
+	struct irq_cfg *cfg;
+	int node;
+	int ioapic, pin;
+	int trigger, polarity;
+
+	ioapic = irq_attr->ioapic;
+#ifdef CONFIG_XEN
+	if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs) {
+		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ %d\n",
+			    ioapic, irq);
+		return -EINVAL;
+	}
+#endif
+	if (!IO_APIC_IRQ(irq)) {
+		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+			ioapic);
+		return -EINVAL;
+	}
+
+	if (dev)
+		node = dev_to_node(dev);
+	else
+		node = cpu_to_node(boot_cpu_id);
+
+	desc = irq_to_desc_alloc_node(irq, node);
+	if (!desc) {
+		printk(KERN_INFO "can not get irq_desc %d\n", irq);
+		return 0;
+	}
+
+	pin = irq_attr->ioapic_pin;
+	trigger = irq_attr->trigger;
+	polarity = irq_attr->polarity;
+
+	/*
+	 * IRQs < 16 are already in the irq_2_pin[] map
+	 */
+	if (irq >= NR_IRQS_LEGACY) {
+		cfg = desc->chip_data;
+		add_pin_to_irq_node(cfg, node, ioapic, pin);
+	}
+
+	setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
+
+	return 0;
+}
+
+int io_apic_set_pci_routing(struct device *dev, int irq,
+				struct io_apic_irq_attr *irq_attr)
+{
+	int ioapic, pin;
+	/*
+	 * Avoid pin reprogramming.  PRTs typically include entries
+	 * with redundant pin->gsi mappings (but unique PCI devices);
+	 * we only program the IOAPIC on the first.
+	 */
+	ioapic = irq_attr->ioapic;
+	pin = irq_attr->ioapic_pin;
+	if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
+		pr_debug("Pin %d-%d already programmed\n",
+			 mp_ioapics[ioapic].apicid, pin);
+		return 0;
+	}
+	set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
+
+	return __io_apic_set_pci_routing(dev, irq, irq_attr);
+}
+
 /* --------------------------------------------------------------------------
                           ACPI-based IOAPIC Configuration
    -------------------------------------------------------------------------- */
@@ -4088,6 +4138,7 @@ int __init io_apic_get_unique_id(int ioa
 
 	return apic_id;
 }
+#endif
 
 int __init io_apic_get_version(int ioapic)
 {
@@ -4100,47 +4151,6 @@ int __init io_apic_get_version(int ioapi
 
 	return reg_01.bits.version;
 }
-#endif
-
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
-{
-	struct irq_desc *desc;
-	struct irq_cfg *cfg;
-	int cpu = boot_cpu_id;
-
-#ifdef CONFIG_XEN
-	if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs) {
-		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ %d\n",
-			    ioapic, irq);
-		return -EINVAL;
-	}
-#endif
-
-	if (!IO_APIC_IRQ(irq)) {
-		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
-			ioapic);
-		return -EINVAL;
-	}
-
-	desc = irq_to_desc_alloc_cpu(irq, cpu);
-	if (!desc) {
-		printk(KERN_INFO "can not get irq_desc %d\n", irq);
-		return 0;
-	}
-
-	/*
-	 * IRQs < 16 are already in the irq_2_pin[] map
-	 */
-	if (irq >= NR_IRQS_LEGACY) {
-		cfg = desc->chip_data;
-		add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
-	}
-
-	setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
-
-	return 0;
-}
-
 
 int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 {
@@ -4172,51 +4182,44 @@ int acpi_get_override_irq(int bus_irq, i
 #ifdef CONFIG_SMP
 void __init setup_ioapic_dest(void)
 {
-	int pin, ioapic, irq, irq_entry;
+	int pin, ioapic = 0, irq, irq_entry;
 	struct irq_desc *desc;
-	struct irq_cfg *cfg;
 	const struct cpumask *mask;
 
 	if (skip_ioapic_setup == 1)
 		return;
 
-	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
-		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
-			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-			if (irq_entry == -1)
-				continue;
-			irq = pin_2_irq(irq_entry, ioapic, pin);
-
-			/* setup_IO_APIC_irqs could fail to get vector for some device
-			 * when you have too many devices, because at that time only boot
-			 * cpu is online.
-			 */
-			desc = irq_to_desc(irq);
-			cfg = desc->chip_data;
-			if (!cfg->vector) {
-				setup_IO_APIC_irq(ioapic, pin, irq, desc,
-						  irq_trigger(irq_entry),
-						  irq_polarity(irq_entry));
-				continue;
+#ifdef CONFIG_ACPI
+	if (!acpi_disabled && acpi_ioapic) {
+		ioapic = mp_find_ioapic(0);
+		if (ioapic < 0)
+			ioapic = 0;
+	}
+#endif
 
-			}
+	for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+		irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+		if (irq_entry == -1)
+			continue;
+		irq = pin_2_irq(irq_entry, ioapic, pin);
 
-			/*
-			 * Honour affinities which have been set in early boot
-			 */
-			if (desc->status &
-			    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-				mask = desc->affinity;
-			else
-				mask = apic->target_cpus();
+		desc = irq_to_desc(irq);
 
-			if (intr_remapping_enabled)
-				set_ir_ioapic_affinity_irq_desc(desc, mask);
-			else
-				set_ioapic_affinity_irq_desc(desc, mask);
-		}
+		/*
+		 * Honour affinities which have been set in early boot
+		 */
+		if (desc->status &
+		    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+			mask = desc->affinity;
+		else
+			mask = apic->target_cpus();
 
+		if (intr_remapping_enabled)
+			set_ir_ioapic_affinity_irq_desc(desc, mask);
+		else
+			set_ioapic_affinity_irq_desc(desc, mask);
 	}
+
 }
 #endif
 
@@ -4299,29 +4302,21 @@ fake_ioapic_page:
 	}
 }
 
-static int __init ioapic_insert_resources(void)
+void __init ioapic_insert_resources(void)
 {
 	int i;
 	struct resource *r = ioapic_resources;
 
 	if (!r) {
-		if (nr_ioapics > 0) {
+		if (nr_ioapics > 0)
 			printk(KERN_ERR
 				"IO APIC resources couldn't be allocated.\n");
-			return -1;
-		}
-		return 0;
+		return;
 	}
 
 	for (i = 0; i < nr_ioapics; i++) {
 		insert_resource(&iomem_resource, r);
 		r++;
 	}
-
-	return 0;
 }
-
-/* Insert the IO APIC resources after PCI initialization has occured to handle
- * IO APICS that are mapped in on a BAR in PCI space. */
-late_initcall(ioapic_insert_resources);
 #endif /* !CONFIG_XEN */
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/apic/probe_32-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/apic/probe_32-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -20,23 +20,12 @@
 #include <asm/apic.h>
 #include <asm/setup.h>
 
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <asm/mpspec.h>
-#include <asm/fixmap.h>
-#include <asm/apicdef.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
 #include <linux/smp.h>
-#include <linux/init.h>
 #include <asm/ipi.h>
 
-#include <linux/smp.h>
-#include <linux/init.h>
 #include <linux/interrupt.h>
 #include <asm/acpi.h>
 #include <asm/e820.h>
-#include <asm/setup.h>
 
 static int xen_phys_pkg_id(int cpuid_apic, int index_msb)
 {
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/cpu/amd.c	2010-01-18 16:31:12.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/cpu/amd.c	2010-01-18 16:53:52.000000000 +0100
@@ -426,7 +426,7 @@ static void __cpuinit early_init_amd(str
 		    (c->x86_model == 8 && c->x86_mask >= 8))
 			set_cpu_cap(c, X86_FEATURE_K6_MTRR);
 #endif
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) && !defined(CONFIG_XEN)
 	/* check CPU config space for extended APIC ID */
 	if (cpu_has_apic && c->x86 >= 0xf) {
 		unsigned int val;
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/cpu/common-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/cpu/common-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -13,6 +13,7 @@
 #include <linux/io.h>
 
 #include <asm/stackprotector.h>
+#include <asm/perf_counter.h>
 #include <asm/mmu_context.h>
 #include <asm/hypervisor.h>
 #include <asm/processor.h>
@@ -66,7 +67,30 @@ void __init setup_cpu_local_masks(void)
 	alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
 }
 
-static const struct cpu_dev *this_cpu __cpuinitdata;
+static void __cpuinit default_init(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_64
+	display_cacheinfo(c);
+#else
+	/* Not much we can do here... */
+	/* Check if at least it has cpuid */
+	if (c->cpuid_level == -1) {
+		/* No cpuid. It must be an ancient CPU */
+		if (c->x86 == 4)
+			strcpy(c->x86_model_id, "486");
+		else if (c->x86 == 3)
+			strcpy(c->x86_model_id, "386");
+	}
+#endif
+}
+
+static const struct cpu_dev __cpuinitconst default_cpu = {
+	.c_init		= default_init,
+	.c_vendor	= "Unknown",
+	.c_x86_vendor	= X86_VENDOR_UNKNOWN,
+};
+
+static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
 
 DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
 #ifdef CONFIG_X86_64
@@ -116,7 +140,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_p
 	/* data */
 	[GDT_ENTRY_APMBIOS_BASE+2]	= { { { 0x0000ffff, 0x00409200 } } },
 
-	[GDT_ENTRY_ESPFIX_SS]		= { { { 0x00000000, 0x00c09200 } } },
+	[GDT_ENTRY_ESPFIX_SS]		= { { { 0x0000ffff, 0x00cf9200 } } },
 #endif
 	[GDT_ENTRY_PERCPU]		= { { { 0x0000ffff, 0x00cf9200 } } },
 	GDT_STACK_CANARY_INIT
@@ -309,7 +333,8 @@ static const char *__cpuinit table_looku
 	return NULL;		/* Not found */
 }
 
-__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+__u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata;
+__u32 cpu_caps_set[NCAPINTS] __cpuinitdata;
 
 void load_percpu_segment(int cpu)
 {
@@ -358,29 +383,6 @@ void switch_to_new_gdt(int cpu)
 
 static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
 
-static void __cpuinit default_init(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_X86_64
-	display_cacheinfo(c);
-#else
-	/* Not much we can do here... */
-	/* Check if at least it has cpuid */
-	if (c->cpuid_level == -1) {
-		/* No cpuid. It must be an ancient CPU */
-		if (c->x86 == 4)
-			strcpy(c->x86_model_id, "486");
-		else if (c->x86 == 3)
-			strcpy(c->x86_model_id, "386");
-	}
-#endif
-}
-
-static const struct cpu_dev __cpuinitconst default_cpu = {
-	.c_init	= default_init,
-	.c_vendor = "Unknown",
-	.c_x86_vendor = X86_VENDOR_UNKNOWN,
-};
-
 static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
 {
 	unsigned int *v;
@@ -513,7 +515,6 @@ out:
 static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
 {
 	char *v = c->x86_vendor_id;
-	static int printed;
 	int i;
 
 	for (i = 0; i < X86_VENDOR_NUM; i++) {
@@ -530,13 +531,9 @@ static void __cpuinit get_cpu_vendor(str
 		}
 	}
 
-	if (!printed) {
-		printed++;
-		printk(KERN_ERR
-		    "CPU: vendor_id '%s' unknown, using generic init.\n", v);
-
-		printk(KERN_ERR "CPU: Your system may be unstable.\n");
-	}
+	printk_once(KERN_ERR
+			"CPU: vendor_id '%s' unknown, using generic init.\n" \
+			"CPU: Your system may be unstable.\n", v);
 
 	c->x86_vendor = X86_VENDOR_UNKNOWN;
 	this_cpu = &default_cpu;
@@ -798,6 +795,12 @@ static void __cpuinit identify_cpu(struc
 	if (this_cpu->c_identify)
 		this_cpu->c_identify(c);
 
+	/* Clear/Set all flags overriden by options, after probe */
+	for (i = 0; i < NCAPINTS; i++) {
+		c->x86_capability[i] &= ~cpu_caps_cleared[i];
+		c->x86_capability[i] |= cpu_caps_set[i];
+	}
+
 #if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
 	c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
 #endif
@@ -843,6 +846,16 @@ static void __cpuinit identify_cpu(struc
 #endif
 
 	init_hypervisor(c);
+
+	/*
+	 * Clear/Set all flags overriden by options, need do it
+	 * before following smp all cpus cap AND.
+	 */
+	for (i = 0; i < NCAPINTS; i++) {
+		c->x86_capability[i] &= ~cpu_caps_cleared[i];
+		c->x86_capability[i] |= cpu_caps_set[i];
+	}
+
 	/*
 	 * On SMP, boot_cpu_data holds the common feature set between
 	 * all CPUs; so make sure that we indicate which features are
@@ -855,10 +868,6 @@ static void __cpuinit identify_cpu(struc
 			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
 	}
 
-	/* Clear all flags overriden by options */
-	for (i = 0; i < NCAPINTS; i++)
-		c->x86_capability[i] &= ~cleared_cpu_caps[i];
-
 #ifdef CONFIG_X86_MCE
 	/* Init Machine Check Exception if available. */
 	mcheck_init(c);
@@ -891,6 +900,7 @@ void __init identify_boot_cpu(void)
 #else
 	vgetcpu_set_mode();
 #endif
+	init_hw_perf_counters();
 }
 
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/cpu/mcheck/Makefile	2010-01-27 14:29:48.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/cpu/mcheck/Makefile	2010-01-27 14:47:15.000000000 +0100
@@ -9,5 +9,3 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += thres
 obj-$(CONFIG_X86_MCE_INJECT)	+= mce-inject.o
 
 obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
-
-disabled-obj-$(CONFIG_XEN)	:= therm_throt.o
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/cpu/mcheck/mce.c	2010-01-27 14:28:39.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/cpu/mcheck/mce.c	2010-01-27 14:47:19.000000000 +0100
@@ -454,7 +454,9 @@ static inline void mce_get_rip(struct mc
  */
 asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs)
 {
+#ifndef CONFIG_XEN
 	ack_APIC_irq();
+#endif
 	exit_idle();
 	irq_enter();
 	mce_notify_irq();
@@ -477,7 +479,7 @@ static void mce_report_event(struct pt_r
 		return;
 	}
 
-#ifdef CONFIG_X86_LOCAL_APIC
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
 	/*
 	 * Without APIC do not notify. The event will be picked
 	 * up eventually.
@@ -2078,7 +2080,7 @@ static __init int mce_init_device(void)
 #ifdef CONFIG_X86_XEN_MCE
 	if (is_initial_xendomain()) {
 		/* Register vIRQ handler for MCE LOG processing */
-		extern void bind_virq_for_mce(void);
+		extern int bind_virq_for_mce(void);
 
 		printk(KERN_DEBUG "MCE: bind virq for DOM0 logging\n");
 		bind_virq_for_mce();
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/cpu/mcheck/mce_dom0.c	2009-11-06 10:45:48.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/cpu/mcheck/mce_dom0.c	2009-11-06 10:52:09.000000000 +0100
@@ -7,12 +7,17 @@
 #include <asm/hypercall.h>
 #include <asm/mce.h>
 
+static xen_mc_logical_cpu_t *g_physinfo;
+static unsigned int ncpus;
+
 static int convert_log(struct mc_info *mi)
 {
 	struct mcinfo_common *mic = NULL;
 	struct mcinfo_global *mc_global;
 	struct mcinfo_bank *mc_bank;
 	struct mce m;
+	unsigned int i;
+	bool found = false;
 
 	x86_mcinfo_lookup(mic, mi, MC_TYPE_GLOBAL);
 	if (mic == NULL)
@@ -21,9 +26,21 @@ static int convert_log(struct mc_info *m
 		return -1;
 	}
 
+	mce_setup(&m);
 	mc_global = (struct mcinfo_global*)mic;
 	m.mcgstatus = mc_global->mc_gstatus;
-	m.cpu = mc_global->mc_coreid;/*for test*/
+	m.apicid = mc_global->mc_apicid;
+
+	for (i = 0; i < ncpus; i++)
+		if (g_physinfo[i].mc_apicid == m.apicid) {
+			found = true;
+			break;
+		}
+	WARN_ON_ONCE(!found);
+	m.socketid = g_physinfo[i].mc_chipid;
+	m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
+	m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
+
 	x86_mcinfo_lookup(mic, mi, MC_TYPE_BANK);
 	do
 	{
@@ -36,7 +53,6 @@ static int convert_log(struct mc_info *m
 			m.status = mc_bank->mc_status;
 			m.addr = mc_bank->mc_addr;
 			m.tsc = mc_bank->mc_tsc;
-			m.res1 = mc_bank->mc_ctrl2;
 			m.bank = mc_bank->mc_bank;
 			printk(KERN_DEBUG "[CPU%d, BANK%d, addr %llx, state %llx]\n", 
 						m.bank, m.cpu, m.addr, m.status);
@@ -116,18 +132,55 @@ end:
 	return IRQ_HANDLED;
 }
 
-void bind_virq_for_mce(void)
+int __init bind_virq_for_mce(void)
 {
 	int ret;
+	xen_mc_t mc_op;
+
+	g_mi = kmalloc(sizeof(*g_mi), GFP_KERNEL);
+	if (!g_mi)
+		return -ENOMEM;
+
+	/* fetch physical CPU count */
+	mc_op.cmd = XEN_MC_physcpuinfo;
+	mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+	set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, NULL);
+	ret = HYPERVISOR_mca(&mc_op);
+	if (ret) {
+		printk(KERN_ERR "MCE: Failed to get physical CPU count\n");
+		kfree(g_mi);
+		return ret;
+	}
+
+	/* fetch CPU physical info for later reference */
+	ncpus = mc_op.u.mc_physcpuinfo.ncpus;
+	g_physinfo = kmalloc(sizeof(*g_physinfo) * ncpus, GFP_KERNEL);
+	if (!g_physinfo) {
+		kfree(g_mi);
+		return -ENOMEM;
+	}
+	set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+	ret = HYPERVISOR_mca(&mc_op);
+	if (ret) {
+		printk(KERN_ERR "MCE: Failed to get physical CPUs' info\n");
+		kfree(g_mi);
+		kfree(g_physinfo);
+		return ret;
+	}
 
 	ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0, 
 		mce_dom0_interrupt, 0, "mce", NULL);
 
-	g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
-	if (ret < 0)
-		printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
+	if (ret < 0) {
+		printk(KERN_ERR "MCE: Failed to bind vIRQ for Dom0\n");
+		kfree(g_mi);
+		kfree(g_physinfo);
+		return ret;
+	}
 
 	/* Log the machine checks left over from the previous reset. */
 	mce_dom0_interrupt(VIRQ_MCA, NULL);
+
+	return 0;
 }
 
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/dumpstack_64.c	2009-11-06 10:51:47.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/dumpstack_64.c	2009-11-06 10:52:09.000000000 +0100
@@ -36,7 +36,11 @@ static char x86_stack_ids[][8] = {
 
 int x86_is_stack_id(int id, char *name)
 {
+#ifndef CONFIG_X86_NO_TSS
 	return x86_stack_ids[id - 1] == name;
+#else
+	return 0;
+#endif
 }
 
 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/e820-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/e820-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -659,7 +659,7 @@ __init int e820_search_gap(unsigned long
  */
 __init void e820_setup_gap(void)
 {
-	unsigned long gapstart, gapsize, round;
+	unsigned long gapstart, gapsize;
 	int found;
 
 	gapstart = 0x10000000;
@@ -668,24 +668,18 @@ __init void e820_setup_gap(void)
 
 #ifdef CONFIG_X86_64
 	if (!found) {
-		printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
-		       "address range\n"
-		       KERN_ERR "PCI: Unassigned devices with 32bit resource "
-		       "registers may break!\n");
+		printk(KERN_ERR
+	"PCI: Warning: Cannot find a gap in the 32bit address range\n"
+	"PCI: Unassigned devices with 32bit resource registers may break!\n");
 		found = e820_search_gap(&gapstart, &gapsize, MAX_GAP_END, 0);
 		WARN_ON(!found);
 	}
 #endif
 
 	/*
-	 * See how much we want to round up: start off with
-	 * rounding to the next 1MB area.
+	 * e820_reserve_resources_late protect stolen RAM already
 	 */
-	round = 0x100000;
-	while ((gapsize >> 4) > round)
-		round += round;
-	/* Fun with two's complement */
-	pci_mem_start = (gapstart + round) & -round;
+	pci_mem_start = gapstart;
 
 	printk(KERN_INFO
 	       "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
@@ -1495,6 +1489,25 @@ void __init e820_reserve_resources(void)
 	}
 }
 
+/* How much should we pad RAM ending depending on where it is? */
+static unsigned long ram_alignment(resource_size_t pos)
+{
+	unsigned long mb = pos >> 20;
+
+	/* To 64kB in the first megabyte */
+	if (!mb)
+		return 64*1024;
+
+	/* To 1MB in the first 16MB */
+	if (mb < 16)
+		return 1024*1024;
+
+	/* To 32MB for anything above that */
+	return 32*1024*1024;
+}
+
+#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
+
 void __init e820_reserve_resources_late(void)
 {
 	int i;
@@ -1506,6 +1519,26 @@ void __init e820_reserve_resources_late(
 			insert_resource_expand_to_fit(&iomem_resource, res);
 		res++;
 	}
+
+	/*
+	 * Try to bump up RAM regions to reasonable boundaries to
+	 * avoid stolen RAM:
+	 */
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *entry = &e820.map[i];
+		u64 start, end;
+
+		if (entry->type != E820_RAM)
+			continue;
+		start = entry->addr + entry->size;
+		end = round_up(start, ram_alignment(start)) - 1;
+		if (end > MAX_RESOURCE_SIZE)
+			end = MAX_RESOURCE_SIZE;
+		if (start >= end)
+			continue;
+		reserve_region_with_split(&iomem_resource, start, end,
+					  "RAM buffer");
+	}
 }
 
 #undef e820
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/entry_32-xen.S	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/entry_32-xen.S	2009-11-06 10:52:09.000000000 +0100
@@ -48,7 +48,6 @@
 #include <asm/segment.h>
 #include <asm/smp.h>
 #include <asm/page_types.h>
-#include <asm/desc.h>
 #include <asm/percpu.h>
 #include <asm/dwarf2.h>
 #include <asm/processor-flags.h>
@@ -88,7 +87,7 @@ NMI_MASK	= 0x80000000
 #define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
 #else
 #define preempt_stop(clobbers)
-#define resume_kernel		restore_nocheck
+#define resume_kernel		restore_all
 #endif
 
 .macro TRACE_IRQS_IRET
@@ -376,7 +375,7 @@ END(ret_from_exception)
 ENTRY(resume_kernel)
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
-	jnz restore_nocheck
+	jnz restore_all
 need_resched:
 	movl TI_flags(%ebp), %ecx	# need_resched set ?
 	testb $_TIF_NEED_RESCHED, %cl
@@ -569,6 +568,8 @@ syscall_exit:
 	jne syscall_exit_work
 
 restore_all:
+	TRACE_IRQS_IRET
+restore_all_notrace:
 #ifndef CONFIG_XEN
 	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS, SS and CS
 	# Warning: PT_OLDSS(%esp) contains the wrong/random values if we
@@ -594,8 +595,6 @@ restore_nocheck:
 	CFI_REMEMBER_STATE
 	jnz restore_all_enable_events	#        != 0 => enable event delivery
 #endif
-	TRACE_IRQS_IRET
-restore_nocheck_notrace:
 	RESTORE_REGS 4			# skip orig_eax/error_code
 	CFI_ADJUST_CFA_OFFSET -4
 irq_return:
@@ -632,22 +631,34 @@ ldt_ss:
 	jne restore_nocheck
 #endif
 
-	/* If returning to userspace with 16bit stack,
-	 * try to fix the higher word of ESP, as the CPU
-	 * won't restore it.
-	 * This is an "official" bug of all the x86-compatible
-	 * CPUs, which we can try to work around to make
-	 * dosemu and wine happy. */
-	movl PT_OLDESP(%esp), %eax
-	movl %esp, %edx
-	call patch_espfix_desc
+/*
+ * Setup and switch to ESPFIX stack
+ *
+ * We're returning to userspace with a 16 bit stack. The CPU will not
+ * restore the high word of ESP for us on executing iret... This is an
+ * "official" bug of all the x86-compatible CPUs, which we can work
+ * around to make dosemu and wine happy. We do this by preloading the
+ * high word of ESP with the high word of the userspace ESP while
+ * compensating for the offset by changing to the ESPFIX segment with
+ * a base address that matches for the difference.
+ */
+	mov %esp, %edx			/* load kernel esp */
+	mov PT_OLDESP(%esp), %eax	/* load userspace esp */
+	mov %dx, %ax			/* eax: new kernel esp */
+	sub %eax, %edx			/* offset (low word is 0) */
+	PER_CPU(gdt_page, %ebx)
+	shr $16, %edx
+	mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */
+	mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */
 	pushl $__ESPFIX_SS
 	CFI_ADJUST_CFA_OFFSET 4
-	pushl %eax
+	push %eax			/* new kernel esp */
 	CFI_ADJUST_CFA_OFFSET 4
+	/* Disable interrupts, but do not irqtrace this section: we
+	 * will soon execute iret and the tracer was already set to
+	 * the irqstate after the iret */
 	DISABLE_INTERRUPTS(CLBR_EAX)
-	TRACE_IRQS_OFF
-	lss (%esp), %esp
+	lss (%esp), %esp		/* switch to espfix segment */
 	CFI_ADJUST_CFA_OFFSET -8
 	jmp restore_nocheck
 #else
@@ -786,15 +797,24 @@ PTREGSCALL(vm86old)
 
 #ifndef CONFIG_XEN
 .macro FIXUP_ESPFIX_STACK
-	/* since we are on a wrong stack, we cant make it a C code :( */
+/*
+ * Switch back for ESPFIX stack to the normal zerobased stack
+ *
+ * We can't call C functions using the ESPFIX stack. This code reads
+ * the high word of the segment base from the GDT and swiches to the
+ * normal stack and adjusts ESP with the matching offset.
+ */
+	/* fixup the stack */
 	PER_CPU(gdt_page, %ebx)
-	GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
-	addl %esp, %eax
+	mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */
+	mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */
+	shl $16, %eax
+	addl %esp, %eax			/* the adjusted stack pointer */
 	pushl $__KERNEL_DS
 	CFI_ADJUST_CFA_OFFSET 4
 	pushl %eax
 	CFI_ADJUST_CFA_OFFSET 4
-	lss (%esp), %esp
+	lss (%esp), %esp		/* switch to the normal stack segment */
 	CFI_ADJUST_CFA_OFFSET -8
 .endm
 .macro UNWIND_ESPFIX_STACK
@@ -1284,6 +1304,7 @@ ENTRY(ftrace_graph_caller)
 	pushl %edx
 	movl 0xc(%esp), %edx
 	lea 0x4(%ebp), %eax
+	movl (%ebp), %ecx
 	subl $MCOUNT_INSN_SIZE, %edx
 	call prepare_ftrace_return
 	popl %edx
@@ -1298,6 +1319,7 @@ return_to_handler:
 	pushl %eax
 	pushl %ecx
 	pushl %edx
+	movl %ebp, %eax
 	call ftrace_return_to_handler
 	movl %eax, 0xc(%esp)
 	popl %edx
@@ -1593,7 +1615,7 @@ nmi_stack_correct:
 	xorl %edx,%edx		# zero error code
 	movl %esp,%eax		# pt_regs pointer
 	call do_nmi
-	jmp restore_nocheck_notrace
+	jmp restore_all_notrace
 	CFI_ENDPROC
 
 nmi_stack_fixup:
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/entry_64.S	2009-11-06 10:51:42.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/entry_64.S	2009-11-06 10:52:09.000000000 +0100
@@ -1446,7 +1446,7 @@ END(kdb_call)
 paranoidzeroentry_ist debug do_debug DEBUG_STACK
 paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
 paranoiderrorentry stack_segment do_stack_segment
-#ifdef CONFIG_XEN
+#ifdef CONFIG_PARAVIRT_XEN
 zeroentry xen_debug do_debug
 zeroentry xen_int3 do_int3
 errorentry xen_stack_segment do_stack_segment
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/entry_64-xen.S	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/entry_64-xen.S	2009-11-06 10:52:09.000000000 +0100
@@ -139,6 +139,7 @@ ENTRY(ftrace_graph_caller)
 
 	leaq 8(%rbp), %rdi
 	movq 0x38(%rsp), %rsi
+	movq (%rbp), %rdx
 	subq $MCOUNT_INSN_SIZE, %rsi
 
 	call	prepare_ftrace_return
@@ -151,27 +152,15 @@ END(ftrace_graph_caller)
 GLOBAL(return_to_handler)
 	subq  $80, %rsp
 
+	/* Save the return values */
 	movq %rax, (%rsp)
-	movq %rcx, 8(%rsp)
-	movq %rdx, 16(%rsp)
-	movq %rsi, 24(%rsp)
-	movq %rdi, 32(%rsp)
-	movq %r8, 40(%rsp)
-	movq %r9, 48(%rsp)
-	movq %r10, 56(%rsp)
-	movq %r11, 64(%rsp)
+	movq %rdx, 8(%rsp)
+	movq %rbp, %rdi
 
 	call ftrace_return_to_handler
 
 	movq %rax, 72(%rsp)
-	movq 64(%rsp), %r11
-	movq 56(%rsp), %r10
-	movq 48(%rsp), %r9
-	movq 40(%rsp), %r8
-	movq 32(%rsp), %rdi
-	movq 24(%rsp), %rsi
-	movq 16(%rsp), %rdx
-	movq 8(%rsp), %rcx
+	movq 8(%rsp), %rdx
 	movq (%rsp), %rax
 	addq $72, %rsp
 	retq
@@ -876,6 +865,8 @@ END(\sym)
 #ifdef CONFIG_SMP
 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
 	irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
+apicinterrupt REBOOT_VECTOR \
+	reboot_interrupt smp_reboot_interrupt
 #endif
 
 #ifdef CONFIG_X86_UV
@@ -907,10 +898,15 @@ apicinterrupt INVALIDATE_TLB_VECTOR_STAR
 #endif
 
 apicinterrupt THRESHOLD_APIC_VECTOR \
-	threshold_interrupt mce_threshold_interrupt
+	threshold_interrupt smp_threshold_interrupt
 apicinterrupt THERMAL_APIC_VECTOR \
 	thermal_interrupt smp_thermal_interrupt
 
+#ifdef CONFIG_X86_MCE
+apicinterrupt MCE_SELF_VECTOR \
+	mce_self_interrupt smp_mce_self_interrupt
+#endif
+
 #ifdef CONFIG_SMP
 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
 	call_function_single_interrupt smp_call_function_single_interrupt
@@ -924,6 +920,11 @@ apicinterrupt ERROR_APIC_VECTOR \
 	error_interrupt smp_error_interrupt
 apicinterrupt SPURIOUS_APIC_VECTOR \
 	spurious_interrupt smp_spurious_interrupt
+
+#ifdef CONFIG_PERF_COUNTERS
+apicinterrupt LOCAL_PENDING_VECTOR \
+	perf_pending_interrupt smp_perf_pending_interrupt
+#endif
 #endif /* !CONFIG_XEN */
 
 /*
@@ -1226,7 +1227,7 @@ paranoiderrorentry stack_segment do_stac
 errorentry general_protection do_general_protection
 errorentry page_fault do_page_fault
 #ifdef CONFIG_X86_MCE
-paranoidzeroentry machine_check do_machine_check
+paranoidzeroentry machine_check *machine_check_vector(%rip)
 #endif
 
 #ifndef CONFIG_XEN
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/head_32-xen.S	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/head_32-xen.S	2009-11-06 10:52:09.000000000 +0100
@@ -120,12 +120,6 @@ ENTRY(hypercall_page)
 	CFI_ENDPROC
 
 /*
- * Real beginning of normal "text" segment
- */
-ENTRY(stext)
-ENTRY(_stext)
-
-/*
  * BSS section
  */
 .section ".bss.page_aligned","wa"
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/head_64-xen.S	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/head_64-xen.S	2009-11-06 10:52:09.000000000 +0100
@@ -15,7 +15,6 @@
 #include <linux/threads.h>
 #include <linux/init.h>
 #include <linux/elfnote.h>
-#include <asm/desc.h>
 #include <asm/segment.h>
 #include <asm/page.h>
 #include <asm/msr.h>
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/init_task.c	2010-03-22 12:07:54.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/init_task.c	2009-11-06 10:52:09.000000000 +0100
@@ -31,6 +31,7 @@ union thread_union init_thread_union __i
 struct task_struct init_task = INIT_TASK(init_task);
 EXPORT_SYMBOL(init_task);
 
+#ifndef CONFIG_X86_NO_TSS
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
  * no more per-task TSS's. The TSS size is kept cacheline-aligned
@@ -39,4 +40,4 @@ EXPORT_SYMBOL(init_task);
  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */
 DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
-
+#endif
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/irq-xen.c	2009-12-18 09:57:23.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/irq-xen.c	2009-12-18 09:58:56.000000000 +0100
@@ -12,6 +12,8 @@
 #include <asm/io_apic.h>
 #include <asm/irq.h>
 #include <asm/idle.h>
+#include <asm/mce.h>
+#include <asm/hw_irq.h>
 
 atomic_t irq_err_count;
 
@@ -26,9 +28,10 @@ void (*generic_interrupt_extension)(void
  */
 void ack_bad_irq(unsigned int irq)
 {
-	printk(KERN_ERR "unexpected IRQ trap at irq %02x\n", irq);
+	if (printk_ratelimit())
+		pr_err("unexpected IRQ trap at vector %02x\n", irq);
 
-#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
+#ifndef CONFIG_XEN
 	/*
 	 * Currently unexpected vectors happen only on SMP and APIC.
 	 * We _must_ ack these because every local APIC has only N
@@ -38,8 +41,7 @@ void ack_bad_irq(unsigned int irq)
 	 * completely.
 	 * But only ack when the APIC is enabled -AK
 	 */
-	if (cpu_has_apic)
-		ack_APIC_irq();
+	ack_APIC_irq();
 #endif
 }
 
@@ -65,6 +67,14 @@ static int show_other_interrupts(struct 
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
 	seq_printf(p, "  Spurious interrupts\n");
+	seq_printf(p, "%*s: ", prec, "CNT");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
+	seq_printf(p, "  Performance counter interrupts\n");
+	seq_printf(p, "%*s: ", prec, "PND");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
+	seq_printf(p, "  Performance pending work\n");
 #endif
 #ifndef CONFIG_XEN
 	if (generic_interrupt_extension) {
@@ -90,17 +100,27 @@ static int show_other_interrupts(struct 
 	seq_printf(p, "  TLB shootdowns\n");
 #endif
 #endif
-#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_THERMAL_VECTOR
 	seq_printf(p, "%*s: ", prec, "TRM");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
 	seq_printf(p, "  Thermal event interrupts\n");
-# ifdef CONFIG_X86_64
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
 	seq_printf(p, "%*s: ", prec, "THR");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
 	seq_printf(p, "  Threshold APIC interrupts\n");
-# endif
+#endif
+#ifdef CONFIG_X86_NEW_MCE
+	seq_printf(p, "%*s: ", prec, "MCE");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
+	seq_printf(p, "  Machine check exceptions\n");
+	seq_printf(p, "%*s: ", prec, "MCP");
+	for_each_online_cpu(j)
+		seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
+	seq_printf(p, "  Machine check polls\n");
 #endif
 	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
 #if defined(CONFIG_X86_IO_APIC)
@@ -172,6 +192,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 #ifdef CONFIG_X86_LOCAL_APIC
 	sum += irq_stats(cpu)->apic_timer_irqs;
 	sum += irq_stats(cpu)->irq_spurious_count;
+	sum += irq_stats(cpu)->apic_perf_irqs;
+	sum += irq_stats(cpu)->apic_pending_irqs;
 #endif
 #ifndef CONFIG_XEN
 	if (generic_interrupt_extension)
@@ -184,11 +206,15 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 	sum += irq_stats(cpu)->irq_tlb_count;
 #endif
 #endif
-#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_THERMAL_VECTOR
 	sum += irq_stats(cpu)->irq_thermal_count;
-# ifdef CONFIG_X86_64
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
 	sum += irq_stats(cpu)->irq_threshold_count;
 #endif
+#ifdef CONFIG_X86_NEW_MCE
+	sum += per_cpu(mce_exception_count, cpu);
+	sum += per_cpu(mce_poll_count, cpu);
 #endif
 	return sum;
 }
@@ -224,14 +250,11 @@ unsigned int __irq_entry do_IRQ(struct p
 	irq = __get_cpu_var(vector_irq)[vector];
 
 	if (!handle_irq(irq, regs)) {
-#ifdef CONFIG_X86_64
-		if (!disable_apic)
-			ack_APIC_irq();
-#endif
+		ack_APIC_irq();
 
 		if (printk_ratelimit())
-			printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n",
-			       __func__, smp_processor_id(), vector, irq);
+			pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n",
+				__func__, smp_processor_id(), vector, irq);
 	}
 
 	irq_exit();
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/microcode_core-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/microcode_core-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -22,27 +22,21 @@
  *	2 of the License, or (at your option) any later version.
  */
 #include <linux/platform_device.h>
-#include <linux/capability.h>
 #include <linux/miscdevice.h>
-#include <linux/firmware.h>
+#include <linux/capability.h>
 #include <linux/smp_lock.h>
-#include <linux/spinlock.h>
-#include <linux/cpumask.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/firmware.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
 
 #include <asm/microcode.h>
 #include <asm/processor.h>
-#include <asm/msr.h>
 
 MODULE_DESCRIPTION("Microcode Update Driver");
 MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
@@ -53,7 +47,18 @@ module_param(verbose, int, 0644);
 
 #define MICROCODE_VERSION	"2.00-xen"
 
-/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
+/*
+ * Synchronization.
+ *
+ * All non cpu-hotplug-callback call sites use:
+ *
+ * - microcode_mutex to synchronize with each other;
+ * - get/put_online_cpus() to synchronize with
+ *   the cpu-hotplug-callback call sites.
+ *
+ * We guarantee that only a single cpu is being
+ * updated at any particular moment of time.
+ */
 static DEFINE_MUTEX(microcode_mutex);
 
 #ifdef CONFIG_MICROCODE_OLD_INTERFACE
@@ -90,18 +95,16 @@ static int microcode_open(struct inode *
 static ssize_t microcode_write(struct file *file, const char __user *buf,
 			       size_t len, loff_t *ppos)
 {
-	ssize_t ret;
+	ssize_t ret = -EINVAL;
 
 	if ((len >> PAGE_SHIFT) > num_physpages) {
-		printk(KERN_ERR "microcode: too much data (max %ld pages)\n",
-		       num_physpages);
- 		return -EINVAL;
+		pr_err("microcode: too much data (max %ld pages)\n", num_physpages);
+		return ret;
  	}
 
 	mutex_lock(&microcode_mutex);
 
-	ret = do_microcode_update(buf, len);
-	if (!ret)
+	if (do_microcode_update(buf, len) == 0)
 		ret = (ssize_t)len;
 
 	mutex_unlock(&microcode_mutex);
@@ -110,15 +113,16 @@ static ssize_t microcode_write(struct fi
 }
 
 static const struct file_operations microcode_fops = {
-	.owner		= THIS_MODULE,
-	.write		= microcode_write,
-	.open		= microcode_open,
+	.owner			= THIS_MODULE,
+	.write			= microcode_write,
+	.open			= microcode_open,
 };
 
 static struct miscdevice microcode_dev = {
-	.minor		= MICROCODE_MINOR,
-	.name		= "microcode",
-	.fops		= &microcode_fops,
+	.minor			= MICROCODE_MINOR,
+	.name			= "microcode",
+	.devnode		= "cpu/microcode",
+	.fops			= &microcode_fops,
 };
 
 static int __init microcode_dev_init(void)
@@ -127,9 +131,7 @@ static int __init microcode_dev_init(voi
 
 	error = misc_register(&microcode_dev);
 	if (error) {
-		printk(KERN_ERR
-			"microcode: can't misc_register on minor=%d\n",
-			MICROCODE_MINOR);
+		pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR);
 		return error;
 	}
 
@@ -188,38 +190,35 @@ static int __init microcode_init(void)
 	else if (c->x86_vendor == X86_VENDOR_AMD)
 		fw_name = "amd-ucode/microcode_amd.bin";
 	else {
-		printk(KERN_ERR "microcode: no support for this CPU vendor\n");
+		pr_err("microcode: no support for this CPU vendor\n");
 		return -ENODEV;
 	}
 
-	error = microcode_dev_init();
-	if (error)
-		return error;
 	microcode_pdev = platform_device_register_simple("microcode", -1,
 							 NULL, 0);
 	if (IS_ERR(microcode_pdev)) {
-		microcode_dev_exit();
 		return PTR_ERR(microcode_pdev);
 	}
 
+	error = microcode_dev_init();
+	if (error)
+		return error;
+
 	request_microcode(fw_name);
 
-	printk(KERN_INFO
-	       "Microcode Update Driver: v" MICROCODE_VERSION
+	pr_info("Microcode Update Driver: v" MICROCODE_VERSION
 	       " <tigran@aivazian.fsnet.co.uk>,"
 	       " Peter Oruba\n");
 
 	return 0;
 }
+module_init(microcode_init);
 
 static void __exit microcode_exit(void)
 {
 	microcode_dev_exit();
 	platform_device_unregister(microcode_pdev);
 
-	printk(KERN_INFO
-	       "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
+	pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
 }
-
-module_init(microcode_init);
 module_exit(microcode_exit);
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/mpparse-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/mpparse-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -17,6 +17,7 @@
 #include <linux/acpi.h>
 #include <linux/module.h>
 #include <linux/smp.h>
+#include <linux/pci.h>
 
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
@@ -890,24 +891,17 @@ static
 inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
 #endif /* CONFIG_X86_IO_APIC */
 
-static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length,
-		      int count)
+static int
+check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
 {
-	if (!mpc_new_phys) {
-		pr_info("No spare slots, try to append...take your risk, "
-			"new mpc_length %x\n", count);
-	} else {
-		if (count <= mpc_new_length)
-			pr_info("No spare slots, try to append..., "
-				"new mpc_length %x\n", count);
-		else {
-			pr_err("mpc_new_length %lx is too small\n",
-				mpc_new_length);
-			return -1;
-		}
+	int ret = 0;
+
+	if (!mpc_new_phys || count <= mpc_new_length) {
+		WARN(1, "update_mptable: No spare slots (length: %x)\n", count);
+		return -1;
 	}
 
-	return 0;
+	return ret;
 }
 
 static int  __init replace_intsrc_all(struct mpc_table *mpc,
@@ -966,7 +960,7 @@ static int  __init replace_intsrc_all(st
 		} else {
 			struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
 			count += sizeof(struct mpc_intsrc);
-			if (!check_slot(mpc_new_phys, mpc_new_length, count))
+			if (check_slot(mpc_new_phys, mpc_new_length, count) < 0)
 				goto out;
 			assign_to_mpc_intsrc(&mp_irqs[i], m);
 			mpc->length = count;
@@ -983,11 +977,14 @@ out:
 	return 0;
 }
 
-static int __initdata enable_update_mptable;
+int enable_update_mptable;
 
 static int __init update_mptable_setup(char *str)
 {
 	enable_update_mptable = 1;
+#ifdef CONFIG_PCI
+	pci_routeirq = 1;
+#endif
 	return 0;
 }
 early_param("update_mptable", update_mptable_setup);
@@ -1000,6 +997,9 @@ static int __initdata alloc_mptable;
 static int __init parse_alloc_mptable_opt(char *p)
 {
 	enable_update_mptable = 1;
+#ifdef CONFIG_PCI
+	pci_routeirq = 1;
+#endif
 	alloc_mptable = 1;
 	if (!p)
 		return 0;
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/pci-dma-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/pci-dma-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -32,6 +32,8 @@ int no_iommu __read_mostly;
 /* Set this to 1 if there is a HW IOMMU in the system */
 int iommu_detected __read_mostly = 0;
 
+int iommu_pass_through;
+
 dma_addr_t bad_dma_address __read_mostly = 0;
 EXPORT_SYMBOL(bad_dma_address);
 
@@ -264,6 +266,10 @@ static __init int iommu_setup(char *p)
 		if (!strncmp(p, "soft", 4))
 			swiotlb = 1;
 #endif
+		if (!strncmp(p, "pt", 2)) {
+			iommu_pass_through = 1;
+			return 1;
+		}
 
 		gart_parse_options(p);
 
@@ -371,6 +377,8 @@ static int __init pci_iommu_init(void)
 void pci_iommu_shutdown(void)
 {
 	gart_iommu_shutdown();
+
+	amd_iommu_shutdown();
 }
 /* Must execute after PCI subsystem */
 fs_initcall(pci_iommu_init);
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/process-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/process-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -8,12 +8,15 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 #include <linux/clockchips.h>
+#include <linux/random.h>
 #include <trace/power.h>
 #include <asm/system.h>
 #include <asm/apic.h>
+#include <asm/syscalls.h>
 #include <asm/idle.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
+#include <asm/ds.h>
 #include <xen/evtchn.h>
 
 unsigned long idle_halt;
@@ -46,6 +49,8 @@ void free_thread_xstate(struct task_stru
 		kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
 		tsk->thread.xstate = NULL;
 	}
+
+	WARN(tsk->thread.ds_ctx, "leaking DS context\n");
 }
 
 void free_thread_info(struct thread_info *ti)
@@ -59,7 +64,7 @@ void arch_task_cache_init(void)
         task_xstate_cachep =
         	kmem_cache_create("task_xstate", xstate_size,
 				  __alignof__(union thread_xstate),
-				  SLAB_PANIC, NULL);
+				  SLAB_PANIC | SLAB_NOTRACK, NULL);
 }
 
 /*
@@ -85,8 +90,6 @@ void exit_thread(void)
 		t->io_bitmap_max = 0;
 		kfree(bp);
 	}
-
-	ds_exit_thread(current);
 }
 
 void flush_thread(void)
@@ -500,16 +503,12 @@ static void c1e_idle(void)
 		if (!cpumask_test_cpu(cpu, c1e_mask)) {
 			cpumask_set_cpu(cpu, c1e_mask);
 			/*
-			 * Force broadcast so ACPI can not interfere. Needs
-			 * to run with interrupts enabled as it uses
-			 * smp_function_call.
+			 * Force broadcast so ACPI can not interfere.
 			 */
-			local_irq_enable();
 			clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
 					   &cpu);
 			printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
 			       cpu);
-			local_irq_disable();
 		}
 		clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
 
@@ -604,3 +603,16 @@ static int __init idle_setup(char *str)
 }
 early_param("idle", idle_setup);
 
+unsigned long arch_align_stack(unsigned long sp)
+{
+	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+		sp -= get_random_int() % 8192;
+	return sp & ~0xf;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+	unsigned long range_end = mm->brk + 0x02000000;
+	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
+
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/process_32-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/process_32-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -9,8 +9,6 @@
  * This file handles the architecture-dependent parts of process handling..
  */
 
-#include <stdarg.h>
-
 #include <linux/stackprotector.h>
 #include <linux/cpu.h>
 #include <linux/errno.h>
@@ -33,7 +31,6 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/ptrace.h>
-#include <linux/random.h>
 #include <linux/personality.h>
 #include <linux/tick.h>
 #include <linux/percpu.h>
@@ -299,7 +296,8 @@ int copy_thread(unsigned long clone_flag
 		p->thread.io_bitmap_max = 0;
 	}
 
-	ds_copy_thread(p, current);
+	clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
+	p->thread.ds_ctx = NULL;
 
 	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
 	p->thread.debugctlmsr = 0;
@@ -470,7 +468,7 @@ __switch_to(struct task_struct *prev_p, 
 	 * done before math_state_restore, so the TS bit is up
 	 * to date.
 	 */
-	arch_leave_lazy_cpu_mode();
+	arch_end_context_switch(next_p);
 
 	/* If the task has used fpu the last 5 timeslices, just do a full
 	 * restore of the math state immediately to avoid the trap; the
@@ -560,15 +558,3 @@ unsigned long get_wchan(struct task_stru
 	return 0;
 }
 
-unsigned long arch_align_stack(unsigned long sp)
-{
-	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
-		sp -= get_random_int() % 8192;
-	return sp & ~0xf;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-	unsigned long range_end = mm->brk + 0x02000000;
-	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
-}
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/process_64-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/process_64-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -17,8 +17,6 @@
  * This file handles the architecture-dependent parts of process handling..
  */
 
-#include <stdarg.h>
-
 #include <linux/stackprotector.h>
 #include <linux/cpu.h>
 #include <linux/errno.h>
@@ -35,7 +33,6 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
-#include <linux/random.h>
 #include <linux/notifier.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
@@ -350,7 +347,8 @@ int copy_thread(unsigned long clone_flag
 	}
         p->thread.iopl = current->thread.iopl;
 
-	ds_copy_thread(p, me);
+	clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
+	p->thread.ds_ctx = NULL;
 
 	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
 	p->thread.debugctlmsr = 0;
@@ -513,7 +511,7 @@ __switch_to(struct task_struct *prev_p, 
 	 * done before math_state_restore, so the TS bit is up
 	 * to date.
 	 */
-	arch_leave_lazy_cpu_mode();
+	arch_end_context_switch(next_p);
 
 	/*
 	 * Switch FS and GS.
@@ -732,15 +730,3 @@ long sys_arch_prctl(int code, unsigned l
 	return do_arch_prctl(current, code, addr);
 }
 
-unsigned long arch_align_stack(unsigned long sp)
-{
-	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
-		sp -= get_random_int() % 8192;
-	return sp & ~0xf;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-	unsigned long range_end = mm->brk + 0x02000000;
-	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
-}
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/quirks-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/quirks-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -492,5 +492,42 @@ void force_hpet_resume(void)
 		break;
 	}
 }
+#endif
+
+#if defined(CONFIG_PCI) && defined(CONFIG_NUMA)
+/* Set correct numa_node information for AMD NB functions */
+static void __init quirk_amd_nb_node(struct pci_dev *dev)
+{
+	struct pci_dev *nb_ht;
+	unsigned int devfn;
+	u32 val;
+
+	devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0);
+	nb_ht = pci_get_slot(dev->bus, devfn);
+	if (!nb_ht)
+		return;
+
+	pci_read_config_dword(nb_ht, 0x60, &val);
+	set_dev_node(&dev->dev, val & 7);
+	pci_dev_put(dev);
+}
 
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_HT,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MAP,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC,
+			quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_LINK,
+			quirk_amd_nb_node);
 #endif
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/setup-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/setup-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -142,6 +142,14 @@ EXPORT_SYMBOL(xen_start_info);
 #define ARCH_SETUP
 #endif
 
+/*
+ * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
+ * The direct mapping extends to max_pfn_mapped, so that we can directly access
+ * apertures, ACPI and other tables without having to play with fixmaps.
+ */
+unsigned long max_low_pfn_mapped;
+unsigned long max_pfn_mapped;
+
 RESERVE_BRK(dmi_alloc, 65536);
 
 unsigned int boot_cpu_id __read_mostly;
@@ -247,8 +255,8 @@ unsigned long mmu_cr4_features;
 unsigned long mmu_cr4_features = X86_CR4_PAE;
 #endif
 
-/* Boot loader ID as an integer, for the benefit of proc_dointvec */
-int bootloader_type;
+/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
+int bootloader_type, bootloader_version;
 
 /*
  * Setup options
@@ -316,6 +324,20 @@ void * __init extend_brk(size_t size, si
 	return ret;
 }
 
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
+static void __init init_gbpages(void)
+{
+	if (direct_gbpages && cpu_has_gbpages)
+		printk(KERN_INFO "Using GB pages for direct mapping\n");
+	else
+		direct_gbpages = 0;
+}
+#else
+static inline void init_gbpages(void)
+{
+}
+#endif
+
 static void __init reserve_brk(void)
 {
 	if (_brk_end > _brk_start)
@@ -328,15 +350,13 @@ static void __init reserve_brk(void)
 
 #ifdef CONFIG_BLK_DEV_INITRD
 
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
-
 #define MAX_MAP_CHUNK	(NR_FIX_BTMAPS << PAGE_SHIFT)
 static void __init relocate_initrd(void)
 {
-
+#ifndef CONFIG_XEN
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
-	u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+	u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
 	u64 ramdisk_here;
 	unsigned long slop, clen, mapaddr;
 	char *p, *q;
@@ -391,8 +411,14 @@ static void __init relocate_initrd(void)
 		" %08llx - %08llx\n",
 		ramdisk_image, ramdisk_image + ramdisk_size - 1,
 		ramdisk_here, ramdisk_here + ramdisk_size - 1);
-}
+#else
+	printk(KERN_ERR "initrd extends beyond end of memory "
+	       "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+	       __pa(xen_start_info->mod_start) + xen_start_info->mod_len,
+	       max_low_pfn_mapped << PAGE_SHIFT);
+	initrd_start = 0;
 #endif
+}
 
 static void __init reserve_initrd(void)
 {
@@ -400,7 +426,7 @@ static void __init reserve_initrd(void)
 	u64 ramdisk_image = boot_params.hdr.ramdisk_image;
 	u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
 	u64 ramdisk_end   = ramdisk_image + ramdisk_size;
-	u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+	u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
 
 	if (!boot_params.hdr.type_of_loader ||
 	    !ramdisk_image || !ramdisk_size)
@@ -409,7 +435,7 @@ static void __init reserve_initrd(void)
 	unsigned long ramdisk_image = __pa(xen_start_info->mod_start);
 	unsigned long ramdisk_size  = xen_start_info->mod_len;
 	unsigned long ramdisk_end   = ramdisk_image + ramdisk_size;
-	unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+	unsigned long end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
 
 	if (!xen_start_info->mod_start || !ramdisk_size)
 		return;		/* No initrd provided by bootloader */
@@ -442,14 +468,8 @@ static void __init reserve_initrd(void)
 		return;
 	}
 
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
 	relocate_initrd();
-#else
-	printk(KERN_ERR "initrd extends beyond end of memory "
-	       "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
-	       ramdisk_end, end_of_lowmem);
-	initrd_start = 0;
-#endif
+
 	free_early(ramdisk_image, ramdisk_end);
 }
 #else
@@ -721,6 +741,19 @@ static struct dmi_system_id __initdata b
 			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
 		},
 	},
+	{
+	/*
+	 * AMI BIOS with low memory corruption was found on Intel DG45ID board.
+	 * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
+	 * match only DMI_BOARD_NAME and see if there is more bad products
+	 * with this vendor.
+	 */
+		.callback = dmi_low_memory_corruption,
+		.ident = "AMI BIOS",
+		.matches = {
+			DMI_MATCH(DMI_BOARD_NAME, "DG45ID"),
+		},
+	},
 #endif
 	{}
 };
@@ -785,6 +818,12 @@ void __init setup_arch(char **cmdline_p)
 #endif
 	saved_video_mode = boot_params.hdr.vid_mode;
 	bootloader_type = boot_params.hdr.type_of_loader;
+	if ((bootloader_type >> 4) == 0xe) {
+		bootloader_type &= 0xf;
+		bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4;
+	}
+	bootloader_version  = bootloader_type & 0xf;
+	bootloader_version |= boot_params.hdr.ext_loader_ver << 4;
 
 #ifdef CONFIG_BLK_DEV_RAM
 	rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
@@ -967,14 +1006,22 @@ void __init setup_arch(char **cmdline_p)
 		max_low_pfn = max_pfn;
 
 	high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
+#ifndef CONFIG_XEN
+	max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
+#endif
 #endif
 
 #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
 	setup_bios_corruption_check();
 #endif
 
+	printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
+			max_pfn_mapped<<PAGE_SHIFT);
+
 	reserve_brk();
 
+	init_gbpages();
+
 	/* max_pfn_mapped is updated here */
 	max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
 	max_pfn_mapped = max_low_pfn_mapped;
@@ -1219,24 +1266,6 @@ void __init setup_arch(char **cmdline_p)
 #if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
 
 /**
- * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
- *
- * Description:
- *	Perform any necessary interrupt initialisation prior to setting up
- *	the "ordinary" interrupt call gates.  For legacy reasons, the ISA
- *	interrupts should be initialised here if the machine emulates a PC
- *	in any way.
- **/
-void __init x86_quirk_pre_intr_init(void)
-{
-	if (x86_quirks->arch_pre_intr_init) {
-		if (x86_quirks->arch_pre_intr_init())
-			return;
-	}
-	init_ISA_irqs();
-}
-
-/**
  * x86_quirk_intr_init - post gate setup interrupt initialisation
  *
  * Description:
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/smp-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/smp-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -135,11 +135,38 @@ void xen_send_call_func_ipi(const struct
  * this function calls the 'stop' function on all other CPUs in the system.
  */
 
+irqreturn_t smp_reboot_interrupt(int irq, void *dev_id)
+{
+	irq_enter();
+	stop_this_cpu(NULL);
+	irq_exit();
+
+	return IRQ_HANDLED;
+}
+
 void xen_smp_send_stop(void)
 {
 	unsigned long flags;
+	unsigned long wait;
+
+	/*
+	 * Use an own vector here because smp_call_function
+	 * does lots of things not suitable in a panic situation.
+	 * On most systems we could also use an NMI here,
+	 * but there are a few systems around where NMI
+	 * is problematic so stay with an non NMI for now
+	 * (this implies we cannot stop CPUs spinning with irq off
+	 * currently)
+	 */
+	if (num_online_cpus() > 1) {
+		xen_send_IPI_allbutself(REBOOT_VECTOR);
+
+		/* Don't wait longer than a second */
+		wait = USEC_PER_SEC;
+		while (num_online_cpus() > 1 && wait--)
+			udelay(1);
+	}
 
-	smp_call_function(stop_this_cpu, NULL, 0);
 	local_irq_save(flags);
 	disable_all_local_evtchn();
 	local_irq_restore(flags);
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/traps-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/traps-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -45,6 +45,7 @@
 #include <linux/edac.h>
 #endif
 
+#include <asm/kmemcheck.h>
 #include <asm/stacktrace.h>
 #include <asm/processor.h>
 #include <asm/debugreg.h>
@@ -53,6 +54,7 @@
 #include <asm/traps.h>
 #include <asm/desc.h>
 #include <asm/i387.h>
+#include <asm/mce.h>
 
 #include <asm/mach_traps.h>
 
@@ -64,8 +66,6 @@
 #include <asm/setup.h>
 #include <asm/traps.h>
 
-#include "cpu/mcheck/mce.h"
-
 asmlinkage int system_call(void);
 
 /* Do we ignore FPU interrupts ? */
@@ -347,6 +347,9 @@ io_check_error(unsigned char reason, str
 	printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
 	show_registers(regs);
 
+	if (panic_on_io_nmi)
+		panic("NMI IOCK error: Not continuing");
+
 	/* Re-enable the IOCK line, wait for a few seconds */
 	clear_io_check_error(reason);
 }
@@ -527,6 +530,10 @@ dotraplinkage void __kprobes do_debug(st
 
 	get_debugreg(condition, 6);
 
+	/* Catch kmemcheck conditions first of all! */
+	if (condition & DR_STEP && kmemcheck_trap(regs))
+		return;
+
 	/*
 	 * The processor cleared BTF, so don't mark that we need it set.
 	 */
@@ -792,15 +799,15 @@ unsigned long patch_espfix_desc(unsigned
 
 	return new_kesp;
 }
-#else
+#endif
+
 asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
 {
 }
 
-asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
+asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
 {
 }
-#endif
 #endif /* CONFIG_XEN */
 
 /*
@@ -834,9 +841,6 @@ asmlinkage void math_state_restore(void)
 	}
 
 	/* NB. 'clts' is done for us by Xen during virtual trap. */
-#ifdef CONFIG_X86_32
-	restore_fpu(tsk);
-#else
 	/*
 	 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
 	 */
@@ -845,7 +849,7 @@ asmlinkage void math_state_restore(void)
 		force_sig(SIGSEGV, tsk);
 		return;
 	}
-#endif
+
 	thread->status |= TS_USEDFPU;	/* So we fnsave on switch_to() */
 	tsk->fpu_counter++;
 }
--- sle11sp1-2010-03-22.orig/arch/x86/kernel/vsyscall_64-xen.c	2009-11-06 10:51:55.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/kernel/vsyscall_64-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -132,15 +132,7 @@ static __always_inline void do_vgettimeo
 			return;
 		}
 
-		/*
-		 * Surround the RDTSC by barriers, to make sure it's not
-		 * speculated to outside the seqlock critical section and
-		 * does not cause time warps:
-		 */
-		rdtsc_barrier();
 		now = vread();
-		rdtsc_barrier();
-
 		base = __vsyscall_gtod_data.clock.cycle_last;
 		mask = __vsyscall_gtod_data.clock.mask;
 		mult = __vsyscall_gtod_data.clock.mult;
--- sle11sp1-2010-03-22.orig/arch/x86/mm/fault-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/mm/fault-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -3,40 +3,18 @@
  *  Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
  *  Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
  */
-#include <linux/interrupt.h>
-#include <linux/mmiotrace.h>
-#include <linux/bootmem.h>
-#include <linux/compiler.h>
-#include <linux/highmem.h>
-#include <linux/kprobes.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-#include <linux/vt_kern.h>
-#include <linux/signal.h>
-#include <linux/kernel.h>
-#include <linux/ptrace.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/kdebug.h>
-#include <linux/errno.h>
-#include <linux/magic.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/mman.h>
-#include <linux/tty.h>
-#include <linux/smp.h>
-#include <linux/mm.h>
-
-#include <asm-generic/sections.h>
-
-#include <asm/tlbflush.h>
-#include <asm/pgalloc.h>
-#include <asm/segment.h>
-#include <asm/system.h>
-#include <asm/proto.h>
-#include <asm/traps.h>
-#include <asm/desc.h>
+#include <linux/magic.h>		/* STACK_END_MAGIC		*/
+#include <linux/sched.h>		/* test_thread_flag(), ...	*/
+#include <linux/kdebug.h>		/* oops_begin/end, ...		*/
+#include <linux/module.h>		/* search_exception_table	*/
+#include <linux/bootmem.h>		/* max_low_pfn			*/
+#include <linux/kprobes.h>		/* __kprobes, ...		*/
+#include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
+#include <linux/perf_counter.h>		/* perf_swcounter_event		*/
+
+#include <asm/traps.h>			/* dotraplinkage, ...		*/
+#include <asm/pgalloc.h>		/* pgd_*(), ...			*/
+#include <asm/kmemcheck.h>		/* kmemcheck_*(), ...		*/
 
 /*
  * Page fault error code bits:
@@ -225,10 +203,7 @@ static inline pmd_t *vmalloc_sync_one(pg
 	if (!pmd_present(*pmd_k))
 		return NULL;
 
-	if (!pmd_present(*pmd)) {
-		bool lazy = percpu_read(xen_lazy_mmu);
-
-		percpu_write(xen_lazy_mmu, false);
+	if (!pmd_present(*pmd))
 #if CONFIG_XEN_COMPAT > 0x030002
 		set_pmd(pmd, *pmd_k);
 #else
@@ -238,10 +213,8 @@ static inline pmd_t *vmalloc_sync_one(pg
 		 */
 		set_pmd(pmd, __pmd(pmd_val(*pmd_k)));
 #endif
-		percpu_write(xen_lazy_mmu, lazy);
-	} else {
+	else
 		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
-	}
 
 	return pmd_k;
 }
@@ -463,10 +436,11 @@ static noinline int vmalloc_fault(unsign
 }
 
 static const char errata93_warning[] =
-KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
-KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
-KERN_ERR "******* Please consider a BIOS update.\n"
-KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
+KERN_ERR
+"******* Your BIOS seems to not contain a fix for K8 errata #93\n"
+"******* Working around it, but it may cause SEGVs or burn power.\n"
+"******* Please consider a BIOS update.\n"
+"******* Disabling USB legacy in the BIOS may also help.\n";
 
 /*
  * No vm86 mode in 64-bit mode:
@@ -551,8 +525,6 @@ bad:
 static int is_errata93(struct pt_regs *regs, unsigned long address)
 {
 #ifdef CONFIG_X86_64
-	static int once;
-
 	if (address != regs->ip)
 		return 0;
 
@@ -562,10 +534,7 @@ static int is_errata93(struct pt_regs *r
 	address |= 0xffffffffUL << 32;
 	if ((address >= (u64)_stext && address <= (u64)_etext) ||
 	    (address >= MODULES_VADDR && address <= MODULES_END)) {
-		if (!once) {
-			printk(errata93_warning);
-			once = 1;
-		}
+		printk_once(errata93_warning);
 		regs->ip = address;
 		return 1;
 	}
@@ -738,7 +707,7 @@ show_signal_msg(struct pt_regs *regs, un
 	if (!printk_ratelimit())
 		return;
 
-	printk(KERN_CONT "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+	printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
 		task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
 		tsk->comm, task_pid_nr(tsk), address,
 		(void *)regs->ip, (void *)regs->sp, error_code);
@@ -1000,11 +969,17 @@ do_page_fault(struct pt_regs *regs, unsi
 	tsk = current;
 	mm = tsk->mm;
 
-	prefetchw(&mm->mmap_sem);
-
 	/* Get the faulting address: */
 	address = read_cr2();
 
+	/*
+	 * Detect and handle instructions that would cause a page fault for
+	 * both a tracked kernel page and a userspace page.
+	 */
+	if (kmemcheck_active(regs))
+		kmemcheck_hide(regs);
+	prefetchw(&mm->mmap_sem);
+
 	if (unlikely(kmmio_fault(regs, address)))
 		return;
 
@@ -1033,9 +1008,13 @@ do_page_fault(struct pt_regs *regs, unsi
 			return;
 		}
 
-		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
-		    vmalloc_fault(address) >= 0)
-			return;
+		if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
+			if (vmalloc_fault(address) >= 0)
+				return;
+
+			if (kmemcheck_fault(regs, address, error_code))
+				return;
+		}
 
 		/* Can handle a stale RO->RW TLB: */
 		if (spurious_fault(error_code, address))
@@ -1074,6 +1053,8 @@ do_page_fault(struct pt_regs *regs, unsi
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(regs, error_code, address);
 
+	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+
 	/*
 	 * If we're in an interrupt, have no user context or are running
 	 * in an atomic region then we must not take the fault:
@@ -1160,17 +1141,22 @@ good_area:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault:
 	 */
-	fault = handle_mm_fault(mm, vma, address, write);
+	fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
 
 	if (unlikely(fault & VM_FAULT_ERROR)) {
 		mm_fault_error(regs, error_code, address, fault);
 		return;
 	}
 
-	if (fault & VM_FAULT_MAJOR)
+	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-	else
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+				     regs, address);
+	} else {
 		tsk->min_flt++;
+		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+				     regs, address);
+	}
 
 	check_v8086_mode(regs, address, tsk);
 
--- sle11sp1-2010-03-22.orig/arch/x86/mm/highmem_32-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/mm/highmem_32-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -44,7 +44,6 @@ void *kmap_atomic_prot(struct page *page
 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
 	BUG_ON(!pte_none(*(kmap_pte-idx)));
 	set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
-	/*arch_flush_lazy_mmu_mode();*/
 
 	return (void *)vaddr;
 }
@@ -74,7 +73,6 @@ void kunmap_atomic(void *kvaddr, enum km
 #endif
 	}
 
-	/*arch_flush_lazy_mmu_mode();*/
 	pagefault_enable();
 }
 
@@ -150,6 +148,7 @@ EXPORT_SYMBOL(kmap);
 EXPORT_SYMBOL(kunmap);
 EXPORT_SYMBOL(kmap_atomic);
 EXPORT_SYMBOL(kunmap_atomic);
+EXPORT_SYMBOL(kmap_atomic_prot);
 #ifdef CONFIG_HIGHPTE
 EXPORT_SYMBOL(kmap_atomic_to_page);
 #endif
--- sle11sp1-2010-03-22.orig/arch/x86/mm/init-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/mm/init-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -1,3 +1,4 @@
+#include <linux/initrd.h>
 #include <linux/ioport.h>
 #include <linux/swap.h>
 #include <linux/bootmem.h>
@@ -11,6 +12,10 @@
 #include <asm/setup.h>
 #include <asm/system.h>
 #include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#include <asm/proto.h>
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
 unsigned long __meminitdata e820_table_start;
 unsigned long __meminitdata e820_table_end;
@@ -31,6 +36,69 @@ extern unsigned long extend_init_mapping
 extern void xen_finish_init_mapping(void);
 #endif
 
+int nx_enabled;
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+static int disable_nx __cpuinitdata;
+
+/*
+ * noexec = on|off
+ *
+ * Control non-executable mappings for processes.
+ *
+ * on      Enable
+ * off     Disable
+ */
+static int __init noexec_setup(char *str)
+{
+	if (!str)
+		return -EINVAL;
+	if (!strncmp(str, "on", 2)) {
+		__supported_pte_mask |= _PAGE_NX;
+		disable_nx = 0;
+	} else if (!strncmp(str, "off", 3)) {
+		disable_nx = 1;
+		__supported_pte_mask &= ~_PAGE_NX;
+	}
+	return 0;
+}
+early_param("noexec", noexec_setup);
+#endif
+
+#ifdef CONFIG_X86_PAE
+static void __init set_nx(void)
+{
+	unsigned int v[4], l, h;
+
+	if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
+		cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
+
+		if ((v[3] & (1 << 20)) && !disable_nx) {
+			rdmsr(MSR_EFER, l, h);
+			l |= EFER_NX;
+			wrmsr(MSR_EFER, l, h);
+			nx_enabled = 1;
+			__supported_pte_mask |= _PAGE_NX;
+		}
+	}
+}
+#else
+static inline void set_nx(void)
+{
+}
+#endif
+
+#ifdef CONFIG_X86_64
+void __cpuinit check_efer(void)
+{
+	unsigned long efer;
+
+	rdmsrl(MSR_EFER, efer);
+	if (!(efer & EFER_NX) || disable_nx)
+		__supported_pte_mask &= ~_PAGE_NX;
+}
+#endif
+
 static void __init find_early_table_space(unsigned long end, int use_pse,
 					  int use_gbpages)
 {
@@ -127,20 +195,6 @@ static int __meminit save_mr(struct map_
 	return nr_range;
 }
 
-#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
-static void __init init_gbpages(void)
-{
-	if (direct_gbpages && cpu_has_gbpages)
-		printk(KERN_INFO "Using GB pages for direct mapping\n");
-	else
-		direct_gbpages = 0;
-}
-#else
-static inline void init_gbpages(void)
-{
-}
-#endif
-
 /*
  * Setup the direct mapping of the physical memory at PAGE_OFFSET.
  * This runs before bootmem is initialized and gets pages directly from
@@ -160,10 +214,7 @@ unsigned long __init_refok init_memory_m
 
 	printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
 
-	if (!after_bootmem)
-		init_gbpages();
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
 	/*
 	 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
 	 * This will simplify cpa(), which otherwise needs to support splitting
@@ -175,12 +226,9 @@ unsigned long __init_refok init_memory_m
 	use_gbpages = direct_gbpages;
 #endif
 
-#ifdef CONFIG_X86_32
-#ifdef CONFIG_X86_PAE
 	set_nx();
 	if (nx_enabled)
 		printk(KERN_INFO "NX (Execute Disable) protection: active\n");
-#endif
 
 	/* Enable PSE if available */
 	if (cpu_has_pse)
@@ -191,7 +239,6 @@ unsigned long __init_refok init_memory_m
 		set_in_cr4(X86_CR4_PGE);
 		__supported_pte_mask |= _PAGE_GLOBAL;
 	}
-#endif
 
 	if (use_gbpages)
 		page_size_mask |= 1 << PG_LEVEL_1G;
--- sle11sp1-2010-03-22.orig/arch/x86/mm/init_32-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/mm/init_32-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -52,12 +52,9 @@
 #include <asm/swiotlb.h>
 #include <asm/setup.h>
 #include <asm/cacheflush.h>
+#include <asm/page_types.h>
 #include <asm/init.h>
 
-unsigned long max_low_pfn_mapped;
-unsigned long max_pfn_mapped;
-
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 unsigned long highstart_pfn, highend_pfn;
 
 static noinline int do_test_wp_bit(void);
@@ -122,7 +119,7 @@ static pte_t * __init one_page_table_ini
 		pte_t *page_table = NULL;
 
 		if (after_bootmem) {
-#ifdef CONFIG_DEBUG_PAGEALLOC
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
 			page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
 #endif
 			if (!page_table)
@@ -569,7 +566,7 @@ static inline void save_pg_dir(void)
 }
 #endif /* !CONFIG_ACPI_SLEEP */
 
-void zap_low_mappings(void)
+void zap_low_mappings(bool early)
 {
 	int i;
 
@@ -586,64 +583,16 @@ void zap_low_mappings(void)
 		set_pgd(swapper_pg_dir+i, __pgd(0));
 #endif
 	}
-	flush_tlb_all();
-}
 
-int nx_enabled;
+	if (early)
+		__flush_tlb();
+	else
+		flush_tlb_all();
+}
 
 pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 
-#ifdef CONFIG_X86_PAE
-
-static int disable_nx __initdata;
-
-/*
- * noexec = on|off
- *
- * Control non executable mappings.
- *
- * on      Enable
- * off     Disable
- */
-static int __init noexec_setup(char *str)
-{
-	if (!str || !strcmp(str, "on")) {
-		if (cpu_has_nx) {
-			__supported_pte_mask |= _PAGE_NX;
-			disable_nx = 0;
-		}
-	} else {
-		if (!strcmp(str, "off")) {
-			disable_nx = 1;
-			__supported_pte_mask &= ~_PAGE_NX;
-		} else {
-			return -EINVAL;
-		}
-	}
-
-	return 0;
-}
-early_param("noexec", noexec_setup);
-
-void __init set_nx(void)
-{
-	unsigned int v[4], l, h;
-
-	if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
-		cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
-
-		if ((v[3] & (1 << 20)) && !disable_nx) {
-			rdmsr(MSR_EFER, l, h);
-			l |= EFER_NX;
-			wrmsr(MSR_EFER, l, h);
-			nx_enabled = 1;
-			__supported_pte_mask |= _PAGE_NX;
-		}
-	}
-}
-#endif
-
 /* user-defined highmem size */
 static unsigned int highmem_pages = -1;
 
@@ -763,15 +712,15 @@ void __init initmem_init(unsigned long s
 	highstart_pfn = highend_pfn = max_pfn;
 	if (max_pfn > max_low_pfn)
 		highstart_pfn = max_low_pfn;
-	memory_present(0, 0, highend_pfn);
 	e820_register_active_regions(0, 0, highend_pfn);
+	sparse_memory_present_with_active_regions(0);
 	printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
 		pages_to_mb(highend_pfn - highstart_pfn));
 	num_physpages = highend_pfn;
 	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
 #else
-	memory_present(0, 0, max_low_pfn);
 	e820_register_active_regions(0, 0, max_low_pfn);
+	sparse_memory_present_with_active_regions(0);
 	num_physpages = max_low_pfn;
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
@@ -1074,7 +1023,7 @@ void __init mem_init(void)
 		test_wp_bit();
 
 	save_pg_dir();
-	zap_low_mappings();
+	zap_low_mappings(true);
 
 	SetPagePinned(virt_to_page(init_mm.pgd));
 }
--- sle11sp1-2010-03-22.orig/arch/x86/mm/init_64-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/mm/init_64-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -56,21 +56,11 @@
 
 #include <xen/features.h>
 
-/*
- * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
- * The direct mapping extends to max_pfn_mapped, so that we can directly access
- * apertures, ACPI and other tables without having to play with fixmaps.
- */
-unsigned long max_low_pfn_mapped;
-unsigned long max_pfn_mapped;
-
 #if CONFIG_XEN_COMPAT <= 0x030002
 unsigned int __kernel_page_user;
 EXPORT_SYMBOL(__kernel_page_user);
 #endif
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 extern pmd_t level2_fixmap_pgt[PTRS_PER_PMD];
 extern pte_t level1_fixmap_pgt[PTRS_PER_PTE];
 
@@ -151,39 +141,6 @@ early_param("gbpages", parse_direct_gbpa
 pteval_t __supported_pte_mask __read_mostly = ~0UL;
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 
-static int disable_nx __cpuinitdata;
-
-/*
- * noexec=on|off
- * Control non-executable mappings for 64-bit processes.
- *
- * on	Enable (default)
- * off	Disable
- */
-static int __init nonx_setup(char *str)
-{
-	if (!str)
-		return -EINVAL;
-	if (!strncmp(str, "on", 2)) {
-		__supported_pte_mask |= _PAGE_NX;
-		disable_nx = 0;
-	} else if (!strncmp(str, "off", 3)) {
-		disable_nx = 1;
-		__supported_pte_mask &= ~_PAGE_NX;
-	}
-	return 0;
-}
-early_param("noexec", nonx_setup);
-
-void __cpuinit check_efer(void)
-{
-	unsigned long efer;
-
-	rdmsrl(MSR_EFER, efer);
-	if (!(efer & EFER_NX) || disable_nx)
-		__supported_pte_mask &= ~_PAGE_NX;
-}
-
 int force_personality32;
 
 /*
@@ -213,7 +170,7 @@ static __ref void *spp_getpage(void)
 	void *ptr;
 
 	if (after_bootmem)
-		ptr = (void *) get_zeroed_page(GFP_ATOMIC);
+		ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
 	else if (e820_table_end < e820_table_top) {
 		ptr = __va(e820_table_end << PAGE_SHIFT);
 		e820_table_end++;
@@ -399,7 +356,7 @@ static __ref void *alloc_low_page(unsign
 	void *adr;
 
 	if (after_bootmem) {
-		adr = (void *)get_zeroed_page(GFP_ATOMIC);
+		adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
 		*phys = __pa(adr);
 
 		return adr;
@@ -804,7 +761,7 @@ void __init xen_finish_init_mapping(void
 		e820_table_top = e820_table_end;
 }
 
-unsigned long __init
+unsigned long __meminit
 kernel_physical_mapping_init(unsigned long start,
 			     unsigned long end,
 			     unsigned long page_size_mask)
@@ -873,6 +830,7 @@ void __init initmem_init(unsigned long s
 	early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
 	reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
 }
+#endif
 
 void __init paging_init(void)
 {
@@ -883,13 +841,21 @@ void __init paging_init(void)
 	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
 	max_zone_pfns[ZONE_NORMAL] = max_pfn;
 
-	memory_present(0, 0, max_pfn);
+	sparse_memory_present_with_active_regions(MAX_NUMNODES);
 	sparse_init();
+
+	/*
+	 * clear the default setting with node 0
+	 * note: don't use nodes_clear here, that is really clearing when
+	 *	 numa support is not compiled in, and later node_set_state
+	 *	 will not set it back.
+	 */
+	node_clear_state(0, N_NORMAL_MEMORY);
+
 	free_area_init_nodes(max_zone_pfns);
 
 	SetPagePinned(virt_to_page(init_mm.pgd));
 }
-#endif
 
 /*
  * Memory hotplug specific functions
@@ -1084,7 +1050,7 @@ int __init reserve_bootmem_generic(unsig
 		return ret;
 
 #else
-	reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
+	reserve_bootmem(phys, len, flags);
 #endif
 
 #ifndef CONFIG_XEN
--- sle11sp1-2010-03-22.orig/arch/x86/mm/iomap_32-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/mm/iomap_32-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -84,7 +84,6 @@ iounmap_atomic(void *kvaddr, enum km_typ
 	if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
 		kpte_clear_flush(kmap_pte-idx, vaddr);
 
-	/*arch_flush_lazy_mmu_mode();*/
 	pagefault_enable();
 }
 EXPORT_SYMBOL_GPL(iounmap_atomic);
--- sle11sp1-2010-03-22.orig/arch/x86/mm/pageattr-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/mm/pageattr-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -11,6 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <linux/pfn.h>
 
 #include <asm/e820.h>
 #include <asm/processor.h>
@@ -486,7 +487,7 @@ static int split_large_page(pte_t *kpte,
 
 	if (!debug_pagealloc)
 		spin_unlock(&cpa_lock);
-	base = alloc_pages(GFP_KERNEL, 0);
+	base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
 	if (!debug_pagealloc)
 		spin_lock(&cpa_lock);
 	if (!base)
@@ -610,9 +611,12 @@ static int __change_page_attr(struct cpa
 	unsigned int level;
 	pte_t *kpte, old_pte;
 
-	if (cpa->flags & CPA_PAGES_ARRAY)
-		address = (unsigned long)page_address(cpa->pages[cpa->curpage]);
-	else if (cpa->flags & CPA_ARRAY)
+	if (cpa->flags & CPA_PAGES_ARRAY) {
+		struct page *page = cpa->pages[cpa->curpage];
+		if (unlikely(PageHighMem(page)))
+			return 0;
+		address = (unsigned long)page_address(page);
+	} else if (cpa->flags & CPA_ARRAY)
 		address = cpa->vaddr[cpa->curpage];
 	else
 		address = *cpa->vaddr;
@@ -702,8 +706,9 @@ static int __change_page_attr_set_clr(st
 static int cpa_process_alias(struct cpa_data *cpa)
 {
 	struct cpa_data alias_cpa;
-	int ret = 0;
-	unsigned long temp_cpa_vaddr, vaddr;
+	unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
+	unsigned long vaddr, remapped;
+	int ret;
 
 	if (cpa->pfn >= max_pfn_mapped)
 		return 0;
@@ -716,9 +721,12 @@ static int cpa_process_alias(struct cpa_
 	 * No need to redo, when the primary call touched the direct
 	 * mapping already:
 	 */
-	if (cpa->flags & CPA_PAGES_ARRAY)
-		vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]);
-	else if (cpa->flags & CPA_ARRAY)
+	if (cpa->flags & CPA_PAGES_ARRAY) {
+		struct page *page = cpa->pages[cpa->curpage];
+		if (unlikely(PageHighMem(page)))
+			return 0;
+		vaddr = (unsigned long)page_address(page);
+	} else if (cpa->flags & CPA_ARRAY)
 		vaddr = cpa->vaddr[cpa->curpage];
 	else
 		vaddr = *cpa->vaddr;
@@ -727,42 +735,55 @@ static int cpa_process_alias(struct cpa_
 		    PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
 
 		alias_cpa = *cpa;
-		temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
-		alias_cpa.vaddr = &temp_cpa_vaddr;
+		alias_cpa.vaddr = &laddr;
 		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
 
-
 		ret = __change_page_attr_set_clr(&alias_cpa, 0);
+		if (ret)
+			return ret;
 	}
 
 #ifdef CONFIG_X86_64
-	if (ret)
-		return ret;
-	/*
-	 * No need to redo, when the primary call touched the high
-	 * mapping already:
-	 */
-	if (within(vaddr, (unsigned long) _text, _brk_end))
-		return 0;
-
 	/*
-	 * If the physical address is inside the kernel map, we need
+	 * If the primary call didn't touch the high mapping already
+	 * and the physical address is inside the kernel map, we need
 	 * to touch the high mapped kernel as well:
 	 */
-	if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn()))
-		return 0;
+	if (!within(vaddr, (unsigned long)_text, _brk_end) &&
+	    within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) {
+		unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
+					       __START_KERNEL_map;
+		alias_cpa = *cpa;
+		alias_cpa.vaddr = &temp_cpa_vaddr;
+		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
 
-	alias_cpa = *cpa;
-	temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map;
-	alias_cpa.vaddr = &temp_cpa_vaddr;
-	alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
+		/*
+		 * The high mapping range is imprecise, so ignore the
+		 * return value.
+		 */
+		__change_page_attr_set_clr(&alias_cpa, 0);
+	}
+#endif
 
 	/*
-	 * The high mapping range is imprecise, so ignore the return value.
-	 */
-	__change_page_attr_set_clr(&alias_cpa, 0);
-#endif
-	return ret;
+	 * If the PMD page was partially used for per-cpu remapping,
+	 * the recycled area needs to be split and modified.  Because
+	 * the area is always proper subset of a PMD page
+	 * cpa->numpages is guaranteed to be 1 for these areas, so
+	 * there's no need to loop over and check for further remaps.
+	 */
+	remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr);
+	if (remapped) {
+		WARN_ON(cpa->numpages > 1);
+		alias_cpa = *cpa;
+		alias_cpa.vaddr = &remapped;
+		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
+		ret = __change_page_attr_set_clr(&alias_cpa, 0);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
@@ -860,15 +881,6 @@ static int change_page_attr_set_clr(unsi
 
 	vm_unmap_aliases();
 
-	/*
-	 * If we're called with lazy mmu updates enabled, the
-	 * in-memory pte state may be stale.  Flush pending updates to
-	 * bring them up to date.
-	 *
-	arch_flush_lazy_mmu_mode();*/
-	if (arch_use_lazy_mmu_mode())
-		xen_multicall_flush(true);
-
 	cpa.vaddr = addr;
 	cpa.pages = pages;
 	cpa.numpages = numpages;
@@ -913,14 +925,6 @@ static int change_page_attr_set_clr(unsi
 	} else
 		cpa_flush_all(cache);
 
-	/*
-	 * If we've been called with lazy mmu updates enabled, then
-	 * make sure that everything gets flushed out before we
-	 * return.
-	 *
-	arch_flush_lazy_mmu_mode();*/
-	WARN_ON_ONCE(arch_use_lazy_mmu_mode() && !irq_count());
-
 out:
 	return ret;
 }
@@ -1065,12 +1069,15 @@ EXPORT_SYMBOL(set_memory_array_uc);
 int _set_memory_wc(unsigned long addr, int numpages)
 {
 	int ret;
+	unsigned long addr_copy = addr;
+
 	ret = change_page_attr_set(&addr, numpages,
 				    __pgprot(_PAGE_CACHE_UC_MINUS), 0);
-
 	if (!ret) {
-		ret = change_page_attr_set(&addr, numpages,
-				    __pgprot(_PAGE_CACHE_WC), 0);
+		ret = change_page_attr_set_clr(&addr_copy, numpages,
+					       __pgprot(_PAGE_CACHE_WC),
+					       __pgprot(_PAGE_CACHE_MASK),
+					       0, 0, NULL);
 	}
 	return ret;
 }
@@ -1187,7 +1194,9 @@ int set_pages_array_uc(struct page **pag
 	int free_idx;
 
 	for (i = 0; i < addrinarray; i++) {
-		start = (unsigned long)page_address(pages[i]);
+		if (PageHighMem(pages[i]))
+			continue;
+		start = page_to_pfn(pages[i]) << PAGE_SHIFT;
 		end = start + PAGE_SIZE;
 		if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
 			goto err_out;
@@ -1200,7 +1209,9 @@ int set_pages_array_uc(struct page **pag
 err_out:
 	free_idx = i;
 	for (i = 0; i < free_idx; i++) {
-		start = (unsigned long)page_address(pages[i]);
+		if (PageHighMem(pages[i]))
+			continue;
+		start = page_to_pfn(pages[i]) << PAGE_SHIFT;
 		end = start + PAGE_SIZE;
 		free_memtype(start, end);
 	}
@@ -1229,7 +1240,9 @@ int set_pages_array_wb(struct page **pag
 		return retval;
 
 	for (i = 0; i < addrinarray; i++) {
-		start = (unsigned long)page_address(pages[i]);
+		if (PageHighMem(pages[i]))
+			continue;
+		start = page_to_pfn(pages[i]) << PAGE_SHIFT;
 		end = start + PAGE_SIZE;
 		free_memtype(start, end);
 	}
--- sle11sp1-2010-03-22.orig/arch/x86/mm/pat-xen.c	2010-02-05 11:17:13.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/mm/pat-xen.c	2010-02-05 11:17:21.000000000 +0100
@@ -639,7 +639,8 @@ static int reserve_pfn_range(u64 paddr, 
 		return ret;
 
 	if (flags != want_flags) {
-		if (strict_prot || !is_new_memtype_allowed(want_flags, flags)) {
+		if (strict_prot ||
+		    !is_new_memtype_allowed(paddr, size, want_flags, flags)) {
 			free_memtype(paddr, paddr + size);
 			printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
 				" for %Lx-%Lx, got %s\n",
--- sle11sp1-2010-03-22.orig/arch/x86/mm/pgtable-xen.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/mm/pgtable-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -8,9 +8,11 @@
 #include <asm/hypervisor.h>
 #include <asm/mmu_context.h>
 
+#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
+
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	pte_t *pte = (pte_t *)__get_free_page(PGALLOC_GFP);
 	if (pte)
 		make_lowmem_page_readonly(pte, XENFEAT_writable_page_tables);
 	return pte;
@@ -27,9 +29,9 @@ pgtable_t pte_alloc_one(struct mm_struct
 	struct page *pte;
 
 #ifdef CONFIG_HIGHPTE
-	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
+	pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
 #else
-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+	pte = alloc_pages(PGALLOC_GFP, 0);
 #endif
 	if (pte) {
 		pgtable_page_ctor(pte);
@@ -65,7 +67,7 @@ void __pte_free(pgtable_t pte)
 	__free_page(pte);
 }
 
-void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
+void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 {
 	pgtable_page_dtor(pte);
 	paravirt_release_pte(page_to_pfn(pte));
@@ -83,7 +85,7 @@ pmd_t *pmd_alloc_one(struct mm_struct *m
 {
 	struct page *pmd;
 
-	pmd = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+	pmd = alloc_pages(PGALLOC_GFP, 0);
 	if (!pmd)
 		return NULL;
 	SetPageForeign(pmd, _pmd_free);
@@ -107,14 +109,14 @@ void __pmd_free(pgtable_t pmd)
 	__free_page(pmd);
 }
 
-void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 {
 	paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
 	tlb_remove_page(tlb, virt_to_page(pmd));
 }
 
 #if PAGETABLE_LEVELS > 3
-void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
+void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
 {
 	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
 	tlb_remove_page(tlb, virt_to_page(pud));
@@ -609,7 +611,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	pmd_t *pmds[PREALLOCATED_PMDS];
 	unsigned long flags;
 
-	pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER);
+	pgd = (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ORDER);
 
 	if (pgd == NULL)
 		goto out;
--- sle11sp1-2010-03-22.orig/arch/x86/pci/i386.c	2010-03-22 12:07:54.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/pci/i386.c	2009-11-06 10:52:09.000000000 +0100
@@ -228,12 +228,14 @@ void __init pcibios_resource_survey(void
 	pcibios_allocate_resources(1);
 
 	e820_reserve_resources_late();
+#ifndef CONFIG_XEN
 	/*
 	 * Insert the IO APIC resources after PCI initialization has
 	 * occured to handle IO APICS that are mapped in on a BAR in
 	 * PCI space, but before trying to assign unassigned pci res.
 	 */
 	ioapic_insert_resources();
+#endif
 }
 
 /**
--- sle11sp1-2010-03-22.orig/arch/x86/pci/irq-xen.c	2009-11-06 10:51:55.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/pci/irq-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -895,6 +895,9 @@ static int pcibios_lookup_irq(struct pci
 		return 0;
 	}
 
+	if (io_apic_assign_pci_irqs)
+		return 0;
+
 	/* Find IRQ routing entry */
 
 	if (!pirq_table)
@@ -1045,56 +1048,15 @@ static void __init pcibios_fixup_irqs(vo
 		pirq_penalty[dev->irq]++;
 	}
 
+	if (io_apic_assign_pci_irqs)
+		return;
+
 	dev = NULL;
 	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
 		pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
 		if (!pin)
 			continue;
 
-#ifdef CONFIG_X86_IO_APIC
-		/*
-		 * Recalculate IRQ numbers if we use the I/O APIC.
-		 */
-		if (io_apic_assign_pci_irqs) {
-			int irq;
-
-			/*
-			 * interrupt pins are numbered starting from 1
-			 */
-			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
-				PCI_SLOT(dev->devfn), pin - 1);
-			/*
-			 * Busses behind bridges are typically not listed in the
-			 * MP-table.  In this case we have to look up the IRQ
-			 * based on the parent bus, parent slot, and pin number.
-			 * The SMP code detects such bridged busses itself so we
-			 * should get into this branch reliably.
-			 */
-			if (irq < 0 && dev->bus->parent) {
-				/* go back to the bridge */
-				struct pci_dev *bridge = dev->bus->self;
-				int bus;
-
-				pin = pci_swizzle_interrupt_pin(dev, pin);
-				bus = bridge->bus->number;
-				irq = IO_APIC_get_PCI_irq_vector(bus,
-						PCI_SLOT(bridge->devfn), pin - 1);
-				if (irq >= 0)
-					dev_warn(&dev->dev,
-						"using bridge %s INT %c to "
-							"get IRQ %d\n",
-						 pci_name(bridge),
-						 'A' + pin - 1, irq);
-			}
-			if (irq >= 0) {
-				dev_info(&dev->dev,
-					"PCI->APIC IRQ transform: INT %c "
-						"-> IRQ %d\n",
-					'A' + pin - 1, irq);
-				dev->irq = irq;
-			}
-		}
-#endif
 		/*
 		 * Still no IRQ? Try to lookup one...
 		 */
@@ -1189,6 +1151,19 @@ int __init pcibios_irq_init(void)
 	pcibios_enable_irq = pirq_enable_irq;
 
 	pcibios_fixup_irqs();
+
+	if (io_apic_assign_pci_irqs && pci_routeirq) {
+		struct pci_dev *dev = NULL;
+		/*
+		 * PCI IRQ routing is set up by pci_enable_device(), but we
+		 * also do it here in case there are still broken drivers that
+		 * don't use pci_enable_device().
+		 */
+		printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n");
+		for_each_pci_dev(dev)
+			pirq_enable_irq(dev);
+	}
+
 	return 0;
 }
 
@@ -1219,16 +1194,23 @@ void pcibios_penalize_isa_irq(int irq, i
 static int pirq_enable_irq(struct pci_dev *dev)
 {
 	u8 pin;
-	struct pci_dev *temp_dev;
 
 	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
-	if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
+	if (pin && !pcibios_lookup_irq(dev, 1)) {
 		char *msg = "";
 
+		if (!io_apic_assign_pci_irqs && dev->irq)
+			return 0;
+
 		if (io_apic_assign_pci_irqs) {
+#ifdef CONFIG_X86_IO_APIC
+			struct pci_dev *temp_dev;
 			int irq;
+			struct io_apic_irq_attr irq_attr;
 
-			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin - 1);
+			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
+						PCI_SLOT(dev->devfn),
+						pin - 1, &irq_attr);
 			/*
 			 * Busses behind bridges are typically not listed in the MP-table.
 			 * In this case we have to look up the IRQ based on the parent bus,
@@ -1241,7 +1223,8 @@ static int pirq_enable_irq(struct pci_de
 
 				pin = pci_swizzle_interrupt_pin(dev, pin);
 				irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
-						PCI_SLOT(bridge->devfn), pin - 1);
+						PCI_SLOT(bridge->devfn),
+						pin - 1, &irq_attr);
 				if (irq >= 0)
 					dev_warn(&dev->dev, "using bridge %s "
 						 "INT %c to get IRQ %d\n",
@@ -1251,12 +1234,15 @@ static int pirq_enable_irq(struct pci_de
 			}
 			dev = temp_dev;
 			if (irq >= 0) {
+				io_apic_set_pci_routing(&dev->dev, irq,
+							 &irq_attr);
+				dev->irq = irq;
 				dev_info(&dev->dev, "PCI->APIC IRQ transform: "
 					 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
-				dev->irq = irq;
 				return 0;
 			} else
 				msg = "; probably buggy MP table";
+#endif
 		} else if (pci_probe & PCI_BIOS_IRQ_SCAN)
 			msg = "";
 		else
--- sle11sp1-2010-03-22.orig/arch/x86/pci/pcifront.c	2009-11-06 10:51:55.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/pci/pcifront.c	2009-11-06 10:52:09.000000000 +0100
@@ -6,6 +6,7 @@
  */
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/irq.h>
 #include <linux/pci.h>
 #include <asm/acpi.h>
 #include <asm/pci_x86.h>
@@ -15,6 +16,7 @@ static int pcifront_enable_irq(struct pc
 {
 	u8 irq;
 	pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
+	irq_to_desc_alloc_node(irq, numa_node_id());
 	evtchn_register_pirq(irq);
 	dev->irq = irq;
 
--- sle11sp1-2010-03-22.orig/arch/x86/vdso/vdso32-setup-xen.c	2009-11-06 10:51:55.000000000 +0100
+++ sle11sp1-2010-03-22/arch/x86/vdso/vdso32-setup-xen.c	2009-11-06 10:52:09.000000000 +0100
@@ -377,6 +377,8 @@ int arch_setup_additional_pages(struct l
 		}
 	}
 
+	current->mm->context.vdso = (void *)addr;
+
 	if (compat_uses_vma || !compat) {
 		/*
 		 * MAYWRITE to allow gdb to COW and set breakpoints
@@ -397,11 +399,13 @@ int arch_setup_additional_pages(struct l
 			goto up_fail;
 	}
 
-	current->mm->context.vdso = (void *)addr;
 	current_thread_info()->sysenter_return =
 		VDSO32_SYMBOL(addr, SYSENTER_RETURN);
 
   up_fail:
+	if (ret)
+		current->mm->context.vdso = NULL;
+
 	up_write(&mm->mmap_sem);
 
 	return ret;
--- sle11sp1-2010-03-22.orig/drivers/acpi/processor_core.c	2010-01-04 12:40:08.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/acpi/processor_core.c	2010-01-04 12:41:22.000000000 +0100
@@ -674,7 +674,14 @@ static int acpi_processor_get_info(struc
 	 * generated as the following format:
 	 * CPU+CPU ID.
 	 */
-	sprintf(acpi_device_bid(device), "CPU%X", pr->id);
+	if (pr->id != -1)
+		sprintf(acpi_device_bid(device), "CPU%X", pr->id);
+	else
+		snprintf(acpi_device_bid(device),
+			 ARRAY_SIZE(acpi_device_bid(device)),
+			 "#%0*X",
+			 (int)ARRAY_SIZE(acpi_device_bid(device)) - 2,
+			 pr->acpi_id);
 	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Processor [%d:%d]\n", pr->id,
 			  pr->acpi_id));
 
--- sle11sp1-2010-03-22.orig/drivers/char/agp/intel-agp.c	2010-01-20 10:22:47.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/char/agp/intel-agp.c	2010-01-20 10:24:03.000000000 +0100
@@ -564,7 +564,11 @@ static struct agp_memory *alloc_agpphysm
 	new->page_count = pg_count;
 	new->num_scratch_pages = pg_count;
 	new->type = AGP_PHYS_MEMORY;
+#ifndef CONFIG_XEN
 	new->physical = page_to_phys(new->pages[0]);
+#else
+	new->physical = page_to_pseudophys(new->pages[0]);
+#endif
 	return new;
 }
 
--- sle11sp1-2010-03-22.orig/drivers/edac/Kconfig	2010-03-22 12:07:54.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/edac/Kconfig	2009-11-06 10:52:09.000000000 +0100
@@ -72,6 +72,7 @@ config EDAC_MM_EDAC
 config EDAC_AMD64
 	tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h"
 	depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && EDAC_DECODE_MCE
+	depends on !XEN
 	help
 	  Support for error detection and correction on the AMD 64
 	  Families of Memory Controllers (K8, F10h and F11h)
--- sle11sp1-2010-03-22.orig/drivers/pci/msi-xen.c	2009-12-04 11:29:54.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/pci/msi-xen.c	2009-12-04 11:30:30.000000000 +0100
@@ -95,22 +95,17 @@ void arch_teardown_msi_irqs(struct pci_d
 }
 #endif
 
-static void __msi_set_enable(struct pci_dev *dev, int pos, int enable)
+static void msi_set_enable(struct pci_dev *dev, int pos, int enable)
 {
 	u16 control;
 
-	if (pos) {
-		pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
-		control &= ~PCI_MSI_FLAGS_ENABLE;
-		if (enable)
-			control |= PCI_MSI_FLAGS_ENABLE;
-		pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
-	}
-}
+	BUG_ON(!pos);
 
-static void msi_set_enable(struct pci_dev *dev, int enable)
-{
-	__msi_set_enable(dev, pci_find_capability(dev, PCI_CAP_ID_MSI), enable);
+	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
+	control &= ~PCI_MSI_FLAGS_ENABLE;
+	if (enable)
+		control |= PCI_MSI_FLAGS_ENABLE;
+	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
 }
 
 static void msix_set_enable(struct pci_dev *dev, int enable)
@@ -335,8 +330,11 @@ void pci_restore_msi_state(struct pci_de
 		return;
 
 	pci_intx_for_msi(dev, 0);
-	if (dev->msi_enabled)
-		msi_set_enable(dev, 0);
+	if (dev->msi_enabled) {
+		int pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
+
+		msi_set_enable(dev, pos, 0);
+	}
 	if (dev->msix_enabled)
 		msix_set_enable(dev, 0);
 
@@ -363,9 +361,9 @@ static int msi_capability_init(struct pc
 	int pos, pirq;
 	u16 control;
 
-	msi_set_enable(dev, 0);	/* Ensure msi is disabled as I set it up */
-
 	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
+	msi_set_enable(dev, pos, 0);	/* Disable MSI during set up */
+
 	pci_read_config_word(dev, msi_control_reg(pos), &control);
 
 	WARN_ON(nvec > 1); /* XXX */
@@ -375,7 +373,7 @@ static int msi_capability_init(struct pc
 
 	/* Set MSI enabled bits	 */
 	pci_intx_for_msi(dev, 0);
-	msi_set_enable(dev, 1);
+	msi_set_enable(dev, pos, 1);
 	dev->msi_enabled = 1;
 
 	dev->irq = pirq;
@@ -397,6 +395,7 @@ static int msix_capability_init(struct p
 {
 	u64 table_base;
 	int pirq, i, j, mapped, pos;
+	u16 control;
 	struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
 	struct msi_pirq_entry *pirq_entry;
 
@@ -406,11 +405,24 @@ static int msix_capability_init(struct p
 	msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
 
 	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+	pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control);
+
+	/* Ensure MSI-X is disabled while it is set up */
+	control &= ~PCI_MSIX_FLAGS_ENABLE;
+	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
+
 	table_base = find_table_base(dev, pos);
 	if (!table_base)
 		return -ENODEV;
 
-	/* MSI-X Table Initialization */
+	/*
+	 * Some devices require MSI-X to be enabled before we can touch the
+	 * MSI-X registers.  We need to mask all the vectors to prevent
+	 * interrupts coming in before they're fully set up.
+	 */
+	control |= PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE;
+	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
+
 	for (i = 0; i < nvec; i++) {
 		mapped = 0;
 		list_for_each_entry(pirq_entry, &msi_dev_entry->pirq_list_head, list) {
@@ -447,10 +459,13 @@ static int msix_capability_init(struct p
 		return avail;
 	}
 
+	/* Set MSI-X enabled bits and unmask the function */
 	pci_intx_for_msi(dev, 0);
-	msix_set_enable(dev, 1);
 	dev->msix_enabled = 1;
 
+	control &= ~PCI_MSIX_FLAGS_MASKALL;
+	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
+
 	return 0;
 }
 
@@ -572,7 +587,7 @@ EXPORT_SYMBOL(pci_enable_msi_block);
 extern void pci_frontend_disable_msi(struct pci_dev* dev);
 void pci_msi_shutdown(struct pci_dev *dev)
 {
-	int pirq;
+	int pirq, pos;
 	struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
 
 	if (!pci_msi_enable || !dev || !dev->msi_enabled)
@@ -594,7 +609,8 @@ void pci_msi_shutdown(struct pci_dev *de
 	msi_unmap_pirq(dev, pirq);
 
 	/* Disable MSI mode */
-	msi_set_enable(dev, 0);
+	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
+	msi_set_enable(dev, pos, 0);
 	pci_intx_for_msi(dev, 1);
 	dev->msi_enabled = 0;
 }
@@ -634,8 +650,8 @@ int pci_msix_table_size(struct pci_dev *
  * indicates the successful configuration of MSI-X capability structure
  * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
  * Or a return of > 0 indicates that driver request is exceeding the number
- * of irqs available. Driver should use the returned value to re-send
- * its request.
+ * of irqs or MSI-X vectors available. Driver should use the returned value to
+ * re-send its request.
  **/
 extern int pci_frontend_enable_msix(struct pci_dev *dev,
 		struct msix_entry *entries, int nvec);
@@ -691,7 +707,7 @@ int pci_enable_msix(struct pci_dev* dev,
 
 	nr_entries = pci_msix_table_size(dev);
 	if (nvec > nr_entries)
-		return -EINVAL;
+		return nr_entries;
 
 	/* Check for any invalid entries */
 	for (i = 0; i < nvec; i++) {
--- sle11sp1-2010-03-22.orig/drivers/xen/Kconfig	2009-12-18 12:27:45.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/Kconfig	2009-12-18 12:27:52.000000000 +0100
@@ -371,7 +371,7 @@ config XEN_SCRUB_PAGES
 
 config XEN_DEV_EVTCHN
 	tristate "Xen /dev/xen/evtchn device"
-	depends on XEN
+	depends on PARAVIRT_XEN
 	default y
 	help
 	  The evtchn driver allows a userspace process to triger event
@@ -404,7 +404,7 @@ config XEN_COMPAT_XENFS
 
 config XEN_SYS_HYPERVISOR
        bool "Create xen entries under /sys/hypervisor"
-       depends on XEN && SYSFS
+       depends on PARAVIRT_XEN && SYSFS
        select SYS_HYPERVISOR
        default y
        help
--- sle11sp1-2010-03-22.orig/drivers/xen/Makefile	2009-11-06 10:51:55.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/Makefile	2009-11-06 10:52:09.000000000 +0100
@@ -14,7 +14,9 @@ obj-$(CONFIG_XEN)		+= features.o util.o
 obj-$(CONFIG_HOTPLUG_CPU)	+= $(xen-hotplug-y)
 obj-$(CONFIG_XEN_XENCOMM)	+= $(xen-xencomm-y)
 obj-$(CONFIG_XEN_BALLOON)	+= $(xen-balloon-y)
+obj-$(CONFIG_XEN_DEV_EVTCHN)	+= evtchn.o
 obj-$(CONFIG_XENFS)		+= xenfs/
+obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
 obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
 obj-$(CONFIG_XEN_BLKDEV_TAP)		+= blktap/
 obj-$(CONFIG_XEN_BLKDEV_TAP2)           += blktap2/
--- sle11sp1-2010-03-22.orig/drivers/xen/balloon/balloon.c	2009-11-06 10:51:42.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/balloon/balloon.c	2010-02-02 14:56:12.000000000 +0100
@@ -323,7 +323,7 @@ static int increase_reservation(unsigned
 	balloon_unlock(flags);
 
 #ifndef MODULE
-	setup_per_zone_pages_min();
+	setup_per_zone_wmarks();
 	if (rc > 0)
 		kswapd_run(0);
 	if (need_zonelists_rebuild)
--- sle11sp1-2010-03-22.orig/drivers/xen/blkback/blkback.c	2010-01-04 12:38:24.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/blkback/blkback.c	2010-03-22 12:26:12.000000000 +0100
@@ -526,7 +526,7 @@ static int dispatch_rw_block_io(blkif_t 
 
 	for (i = 0; i < nseg; i++) {
 		if (((int)preq.sector_number|(int)seg[i].nsec) &
-		    ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) {
+		    ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
 			DPRINTK("Misaligned I/O request from domain %d",
 				blkif->domid);
 			goto fail_put_bio;
--- sle11sp1-2010-03-22.orig/drivers/xen/blkback/vbd.c	2010-03-22 12:22:52.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/blkback/vbd.c	2009-11-06 10:52:09.000000000 +0100
@@ -47,7 +47,7 @@ unsigned int vbd_info(struct vbd *vbd)
 
 unsigned long vbd_secsize(struct vbd *vbd)
 {
-	return bdev_hardsect_size(vbd->bdev);
+	return bdev_logical_block_size(vbd->bdev);
 }
 
 int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major,
--- sle11sp1-2010-03-22.orig/drivers/xen/blkback/xenbus.c	2010-03-22 12:20:09.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/blkback/xenbus.c	2010-03-22 12:26:08.000000000 +0100
@@ -108,7 +108,7 @@ static void update_blkif_status(blkif_t 
 		if (!get_device(_dev))					\
 			return ret;					\
 		dev = to_xenbus_device(_dev);				\
-		if ((be = dev->dev.driver_data) != NULL)		\
+		if ((be = dev_get_drvdata(&dev->dev)) != NULL)		\
 			ret = sprintf(buf, format, ##args);		\
 		put_device(_dev);					\
 		return ret;						\
@@ -173,7 +173,7 @@ void xenvbd_sysfs_delif(struct xenbus_de
 
 static int blkback_remove(struct xenbus_device *dev)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 
 	DPRINTK("");
 
@@ -194,7 +194,7 @@ static int blkback_remove(struct xenbus_
 	}
 
 	kfree(be);
-	dev->dev.driver_data = NULL;
+	dev_set_drvdata(&dev->dev, NULL);
 	return 0;
 }
 
@@ -229,7 +229,7 @@ static int blkback_probe(struct xenbus_d
 		return -ENOMEM;
 	}
 	be->dev = dev;
-	dev->dev.driver_data = be;
+	dev_set_drvdata(&dev->dev, be);
 
 	be->blkif = blkif_alloc(dev->otherend_id);
 	if (IS_ERR(be->blkif)) {
@@ -352,7 +352,7 @@ static void backend_changed(struct xenbu
 static void frontend_changed(struct xenbus_device *dev,
 			     enum xenbus_state frontend_state)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 	int err;
 
 	DPRINTK("%s", xenbus_strstate(frontend_state));
--- sle11sp1-2010-03-22.orig/drivers/xen/blkfront/blkfront.c	2010-03-22 12:23:52.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/blkfront/blkfront.c	2010-03-22 12:26:04.000000000 +0100
@@ -119,12 +119,12 @@ static int blkfront_probe(struct xenbus_
 
 	/* Front end dir is a number, which is used as the id. */
 	info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
-	dev->dev.driver_data = info;
+	dev_set_drvdata(&dev->dev, info);
 
 	err = talk_to_backend(dev, info);
 	if (err) {
 		kfree(info);
-		dev->dev.driver_data = NULL;
+		dev_set_drvdata(&dev->dev, NULL);
 		return err;
 	}
 
@@ -140,7 +140,7 @@ static int blkfront_probe(struct xenbus_
  */
 static int blkfront_resume(struct xenbus_device *dev)
 {
-	struct blkfront_info *info = dev->dev.driver_data;
+	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
 	int err;
 
 	DPRINTK("blkfront_resume: %s\n", dev->nodename);
@@ -265,7 +265,7 @@ fail:
 static void backend_changed(struct xenbus_device *dev,
 			    enum xenbus_state backend_state)
 {
-	struct blkfront_info *info = dev->dev.driver_data;
+	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
 	struct block_device *bd;
 
 	DPRINTK("blkfront:backend_changed.\n");
@@ -434,7 +434,7 @@ static void blkfront_closing(struct blkf
 
 static int blkfront_remove(struct xenbus_device *dev)
 {
-	struct blkfront_info *info = dev->dev.driver_data;
+	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
 
 	DPRINTK("blkfront_remove: %s removed\n", dev->nodename);
 
@@ -664,7 +664,7 @@ static int blkif_queue_request(struct re
 	info->shadow[id].request = (unsigned long)req;
 
 	ring_req->id = id;
-	ring_req->sector_number = (blkif_sector_t)req->sector;
+	ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
 	ring_req->handle = info->handle;
 
 	ring_req->operation = rq_data_dir(req) ?
@@ -720,25 +720,25 @@ void do_blkif_request(struct request_que
 
 	queued = 0;
 
-	while ((req = elv_next_request(rq)) != NULL) {
+	while ((req = blk_peek_request(rq)) != NULL) {
 		info = req->rq_disk->private_data;
-		if (!blk_fs_request(req)) {
-			end_request(req, 0);
-			continue;
-		}
 
 		if (RING_FULL(&info->ring))
 			goto wait;
 
-		DPRINTK("do_blk_req %p: cmd %p, sec %llx, "
-			"(%u/%li) buffer:%p [%s]\n",
-			req, req->cmd, (long long)req->sector,
-			req->current_nr_sectors,
-			req->nr_sectors, req->buffer,
-			rq_data_dir(req) ? "write" : "read");
+		blk_start_request(req);
 
+		if (!blk_fs_request(req)) {
+			__blk_end_request_all(req, -EIO);
+			continue;
+		}
+
+		DPRINTK("do_blk_req %p: cmd %p, sec %llx, "
+			"(%u/%u) buffer:%p [%s]\n",
+			req, req->cmd, (long long)blk_rq_pos(req),
+			blk_rq_cur_sectors(req), blk_rq_sectors(req),
+			req->buffer, rq_data_dir(req) ? "write" : "read");
 
-		blkdev_dequeue_request(req);
 		if (blkif_queue_request(req)) {
 			blk_requeue_request(rq, req);
 		wait:
@@ -803,8 +803,7 @@ static irqreturn_t blkif_int(int irq, vo
 				DPRINTK("Bad return from blkdev data "
 					"request: %x\n", bret->status);
 
-			ret = __blk_end_request(req, ret, blk_rq_bytes(req));
-			BUG_ON(ret);
+			__blk_end_request_all(req, ret);
 			break;
 		default:
 			BUG();
@@ -930,7 +929,7 @@ static void blkif_recover(struct blkfron
 
 int blkfront_is_ready(struct xenbus_device *dev)
 {
-	struct blkfront_info *info = dev->dev.driver_data;
+	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
 
 	return info->is_ready && info->xbdev;
 }
--- sle11sp1-2010-03-22.orig/drivers/xen/blkfront/vbd.c	2010-01-18 16:50:37.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/blkfront/vbd.c	2010-01-18 16:54:23.000000000 +0100
@@ -313,7 +313,7 @@ xlvbd_init_blk_queue(struct gendisk *gd,
 #endif
 
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
-	blk_queue_hardsect_size(rq, sector_size);
+	blk_queue_logical_block_size(rq, sector_size);
 	blk_queue_max_sectors(rq, 512);
 
 	/* Each segment in a request is up to an aligned page in size. */
@@ -500,7 +500,7 @@ static ssize_t show_media(struct device 
 		                  struct device_attribute *attr, char *buf)
 {
 	struct xenbus_device *xendev = to_xenbus_device(dev);
-	struct blkfront_info *info = xendev->dev.driver_data;
+	struct blkfront_info *info = dev_get_drvdata(&xendev->dev);
 
 	if (info->gd->flags & GENHD_FL_CD)
 		return sprintf(buf, "cdrom\n");
--- sle11sp1-2010-03-22.orig/drivers/xen/blktap/blktap.c	2010-01-04 12:40:02.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/blktap/blktap.c	2010-01-04 12:41:47.000000000 +0100
@@ -566,7 +566,8 @@ void signal_tapdisk(int idx) 
 		return;
 
 	if (info->pid > 0) {
-		ptask = find_task_by_pid_ns(info->pid, info->pid_ns);
+		ptask = pid_task(find_pid_ns(info->pid, info->pid_ns),
+				 PIDTYPE_PID);
 		if (ptask)
 			info->status = CLEANSHUTDOWN;
 	}
--- sle11sp1-2010-03-22.orig/drivers/xen/blktap/xenbus.c	2010-01-27 14:35:03.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/blktap/xenbus.c	2010-01-27 14:48:30.000000000 +0100
@@ -128,7 +128,7 @@ static int blktap_name(blkif_t *blkif, c
 		if (!get_device(_dev))					\
 			return ret;					\
 		dev = to_xenbus_device(_dev);				\
-		if ((be = dev->dev.driver_data) != NULL)		\
+		if ((be = dev_get_drvdata(&dev->dev)) != NULL)		\
 			ret = sprintf(buf, format, ##args);		\
 		put_device(_dev);					\
 		return ret;						\
@@ -158,7 +158,7 @@ static struct attribute_group tapstat_gr
 int xentap_sysfs_addif(struct xenbus_device *dev)
 {
 	int err;
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 	err = sysfs_create_group(&dev->dev.kobj, &tapstat_group);
 	if (!err)
 		be->group_added = 1;
@@ -167,14 +167,14 @@ int xentap_sysfs_addif(struct xenbus_dev
 
 void xentap_sysfs_delif(struct xenbus_device *dev)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 	sysfs_remove_group(&dev->dev.kobj, &tapstat_group);
 	be->group_added = 0;
 }
 
 static int blktap_remove(struct xenbus_device *dev)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 
 	if (be->group_added)
 		xentap_sysfs_delif(be->dev);
@@ -192,7 +192,7 @@ static int blktap_remove(struct xenbus_d
 		be->blkif = NULL;
 	}
 	kfree(be);
-	dev->dev.driver_data = NULL;
+	dev_set_drvdata(&dev->dev, NULL);
 	return 0;
 }
 
@@ -258,7 +258,7 @@ static int blktap_probe(struct xenbus_de
 	}
 
 	be->dev = dev;
-	dev->dev.driver_data = be;
+	dev_set_drvdata(&dev->dev, be);
 	be->xenbus_id = get_id(dev->nodename);
 
 	be->blkif = tap_alloc_blkif(dev->otherend_id);
@@ -338,7 +338,7 @@ static void tap_backend_changed(struct x
 static void tap_frontend_changed(struct xenbus_device *dev,
 			     enum xenbus_state frontend_state)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 	int err;
 
 	DPRINTK("\n");
--- sle11sp1-2010-03-22.orig/drivers/xen/blktap2/device.c	2009-11-06 10:51:47.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/blktap2/device.c	2010-03-01 14:34:58.000000000 +0100
@@ -208,13 +208,6 @@ flush_tlb_kernel_page(unsigned long kvad
 #endif
 }
 
-static void
-blktap_device_end_dequeued_request(struct request *req, int ret)
-{
-	if (blk_end_request(req, ret, blk_rq_bytes(req)))
-		BUG();
-}
-
 /*
  * tap->tap_sem held on entry
  */
@@ -381,7 +374,7 @@ blktap_device_fail_pending_requests(stru
 
 		blktap_unmap(tap, request);
 		req = (struct request *)(unsigned long)request->id;
-		blktap_device_end_dequeued_request(req, -ENODEV);
+		blk_end_request_all(req, -ENODEV);
 		blktap_request_free(tap, request);
 	}
 
@@ -420,7 +413,7 @@ blktap_device_finish_request(struct blkt
 		if (unlikely(res->status != BLKIF_RSP_OKAY))
 			BTERR("Bad return from device data "
 				"request: %x\n", res->status);
-		blktap_device_end_dequeued_request(req,
+		blk_end_request_all(req,
 			res->status == BLKIF_RSP_OKAY ? 0 : -EIO);
 		break;
 	default:
@@ -651,7 +644,7 @@ blktap_device_process_request(struct blk
 	ring    = &tap->ring;
 	usr_idx = request->usr_idx;
 	blkif_req.id = usr_idx;
-	blkif_req.sector_number = (blkif_sector_t)req->sector;
+	blkif_req.sector_number = (blkif_sector_t)blk_rq_pos(req);
 	blkif_req.handle = 0;
 	blkif_req.operation = rq_data_dir(req) ?
 		BLKIF_OP_WRITE : BLKIF_OP_READ;
@@ -848,47 +841,46 @@ blktap_device_run_queue(struct blktap *t
 
 	BTDBG("running queue for %d\n", tap->minor);
 
-	while ((req = elv_next_request(rq)) != NULL) {
+	while ((req = blk_peek_request(rq)) != NULL) {
+		if (RING_FULL(&ring->ring)) {
+		wait:
+			/* Avoid pointless unplugs. */
+			blk_stop_queue(rq);
+			blktap_defer(tap);
+			break;
+		}
+
+		blk_start_request(req);
+
 		if (!blk_fs_request(req)) {
-			end_request(req, 0);
+			__blk_end_request_all(req, -EIO);
 			continue;
 		}
 
 		if (blk_barrier_rq(req)) {
-			end_request(req, 0);
+			__blk_end_request_all(req, -EOPNOTSUPP);
 			continue;
 		}
 
 #ifdef ENABLE_PASSTHROUGH
 		if (test_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse)) {
-			blkdev_dequeue_request(req);
 			blktap_device_forward_request(tap, req);
 			continue;
 		}
 #endif
 
-		if (RING_FULL(&ring->ring)) {
-		wait:
-			/* Avoid pointless unplugs. */
-			blk_stop_queue(rq);
-			blktap_defer(tap);
-			break;
-		}
-
 		request = blktap_request_allocate(tap);
 		if (!request) {
 			tap->stats.st_oo_req++;
 			goto wait;
 		}
 
-		BTDBG("req %p: dev %d cmd %p, sec 0x%llx, (0x%x/0x%lx) "
+		BTDBG("req %p: dev %d cmd %p, sec 0x%llx, (0x%x/0x%x) "
 		      "buffer:%p [%s], pending: %p\n", req, tap->minor,
-		      req->cmd, (unsigned long long)req->sector,
-		      req->current_nr_sectors, req->nr_sectors, req->buffer,
+		      req->cmd, (unsigned long long)blk_rq_pos(req),
+		      blk_rq_cur_sectors(req), blk_rq_sectors(req), req->buffer,
 		      rq_data_dir(req) ? "write" : "read", request);
 
-		blkdev_dequeue_request(req);
-
 		spin_unlock_irq(&dev->lock);
 		down_read(&tap->tap_sem);
 
@@ -896,7 +888,7 @@ blktap_device_run_queue(struct blktap *t
 		if (!err)
 			queued++;
 		else {
-			blktap_device_end_dequeued_request(req, err);
+			blk_end_request_all(req, err);
 			blktap_request_free(tap, request);
 		}
 
@@ -936,11 +928,13 @@ blktap_device_do_request(struct request_
 	return;
 
 fail:
-	while ((req = elv_next_request(rq))) {
+	while ((req = blk_peek_request(rq))) {
 		BTERR("device closed: failing secs %llu - %llu\n",
-		      (unsigned long long)req->sector,
-		      (unsigned long long)req->sector + req->nr_sectors);
-		end_request(req, 0);
+		      (unsigned long long)blk_rq_pos(req),
+		      (unsigned long long)blk_rq_pos(req)
+		      + blk_rq_cur_sectors(req));
+		blk_start_request(req);
+		__blk_end_request_all(req, -EIO);
 	}
 }
 
@@ -995,7 +989,7 @@ blktap_device_configure(struct blktap *t
 	set_capacity(dev->gd, tap->params.capacity);
 
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
-	blk_queue_hardsect_size(rq, tap->params.sector_size);
+	blk_queue_logical_block_size(rq, tap->params.sector_size);
 	blk_queue_max_sectors(rq, 512);
 
 	/* Each segment in a request is up to an aligned page in size. */
--- sle11sp1-2010-03-22.orig/drivers/xen/console/console.c	2009-11-06 10:51:42.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/console/console.c	2009-11-06 10:52:09.000000000 +0100
@@ -46,7 +46,6 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/console.h>
-#include <linux/bootmem.h>
 #include <linux/sysrq.h>
 #include <linux/screen_info.h>
 #include <linux/vt.h>
@@ -236,7 +235,7 @@ static int __init xen_console_init(void)
 		goto out;
 	}
 
-	wbuf = alloc_bootmem(wbuf_size);
+	wbuf = kmalloc(wbuf_size, GFP_KERNEL);
 
 	register_console(&kcons_info);
 
@@ -632,8 +631,8 @@ static void xencons_close(struct tty_str
 	tty->closing = 1;
 	tty_wait_until_sent(tty, 0);
 	tty_driver_flush_buffer(tty);
-	if (tty->ldisc.ops->flush_buffer != NULL)
-		tty->ldisc.ops->flush_buffer(tty);
+	if (tty->ldisc->ops->flush_buffer)
+		tty->ldisc->ops->flush_buffer(tty);
 	tty->closing = 0;
 	spin_lock_irqsave(&xencons_lock, flags);
 	xencons_tty = NULL;
--- sle11sp1-2010-03-22.orig/drivers/xen/core/evtchn.c	2010-02-09 17:00:51.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/core/evtchn.c	2010-02-09 17:06:02.000000000 +0100
@@ -35,7 +35,6 @@
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
-#include <linux/bootmem.h>
 #include <linux/ftrace.h>
 #include <linux/version.h>
 #include <asm/atomic.h>
@@ -350,7 +349,7 @@ static int find_unbound_irq(unsigned int
 	int irq;
 
 	for (irq = DYNIRQ_BASE; irq < (DYNIRQ_BASE + NR_DYNIRQS); irq++) {
-		struct irq_desc *desc = irq_to_desc_alloc_cpu(irq, cpu);
+		struct irq_desc *desc = irq_to_desc_alloc_node(irq, cpu_to_node(cpu));
 		struct irq_cfg *cfg = desc->chip_data;
 
 		if (!cfg->bindcount) {
@@ -703,9 +702,11 @@ static void rebind_irq_to_cpu(unsigned i
 		rebind_evtchn_to_cpu(evtchn, tcpu);
 }
 
-static void set_affinity_irq(unsigned int irq, const struct cpumask *dest)
+static int set_affinity_irq(unsigned int irq, const struct cpumask *dest)
 {
 	rebind_irq_to_cpu(irq, cpumask_first(dest));
+
+	return 0;
 }
 #endif
 
@@ -1266,7 +1267,7 @@ int evtchn_map_pirq(int irq, int xen_pir
 
 			if (identity_mapped_irq(irq))
 				continue;
-			desc = irq_to_desc_alloc_cpu(irq, smp_processor_id());
+			desc = irq_to_desc_alloc_node(irq, numa_node_id());
 			cfg = desc->chip_data;
 			if (!index_from_irq(irq)) {
 				BUG_ON(type_from_irq(irq) != IRQT_UNBOUND);
@@ -1317,8 +1318,9 @@ void __init xen_init_IRQ(void)
 
 	init_evtchn_cpu_bindings();
 
-	pirq_needs_eoi = alloc_bootmem_pages(sizeof(unsigned long)
-		* BITS_TO_LONGS(ALIGN(nr_pirqs, PAGE_SIZE * 8)));
+	i = get_order(sizeof(unsigned long) * BITS_TO_LONGS(nr_pirqs));
+	pirq_needs_eoi = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, i);
+	BUILD_BUG_ON(NR_PIRQS > PAGE_SIZE * 8);
  	eoi_gmfn.gmfn = virt_to_machine(pirq_needs_eoi) >> PAGE_SHIFT;
 	if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn) == 0)
 		pirq_eoi_does_unmask = true;
--- sle11sp1-2010-03-22.orig/drivers/xen/core/smpboot.c	2010-03-22 12:25:29.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/core/smpboot.c	2010-03-22 12:25:59.000000000 +0100
@@ -43,9 +43,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
 static DEFINE_PER_CPU(int, resched_irq);
 static DEFINE_PER_CPU(int, callfunc_irq);
 static DEFINE_PER_CPU(int, call1func_irq);
+static DEFINE_PER_CPU(int, reboot_irq);
 static char resched_name[NR_CPUS][15];
 static char callfunc_name[NR_CPUS][15];
 static char call1func_name[NR_CPUS][15];
+static char reboot_name[NR_CPUS][15];
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #define set_cpu_to_apicid(cpu, apicid) (per_cpu(x86_cpu_to_apicid, cpu) = (apicid))
@@ -110,7 +112,7 @@ static int __cpuinit xen_smp_intr_init(u
 	int rc;
 
 	per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) =
-		per_cpu(call1func_irq, cpu) = -1;
+		per_cpu(call1func_irq, cpu) = per_cpu(reboot_irq, cpu) = -1;
 
 	sprintf(resched_name[cpu], "resched%u", cpu);
 	rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR,
@@ -145,6 +147,17 @@ static int __cpuinit xen_smp_intr_init(u
 		goto fail;
 	per_cpu(call1func_irq, cpu) = rc;
 
+	sprintf(reboot_name[cpu], "reboot%u", cpu);
+	rc = bind_ipi_to_irqhandler(REBOOT_VECTOR,
+				    cpu,
+				    smp_reboot_interrupt,
+				    IRQF_DISABLED|IRQF_NOBALANCING,
+				    reboot_name[cpu],
+				    NULL);
+	if (rc < 0)
+		goto fail;
+	per_cpu(reboot_irq, cpu) = rc;
+
 	rc = xen_spinlock_init(cpu);
 	if (rc < 0)
 		goto fail;
@@ -161,6 +174,8 @@ static int __cpuinit xen_smp_intr_init(u
 		unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
 	if (per_cpu(call1func_irq, cpu) >= 0)
 		unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL);
+	if (per_cpu(reboot_irq, cpu) >= 0)
+		unbind_from_irqhandler(per_cpu(reboot_irq, cpu), NULL);
 	xen_spinlock_cleanup(cpu);
 	return rc;
 }
@@ -174,6 +189,7 @@ static void __cpuexit xen_smp_intr_exit(
 	unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
 	unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
 	unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL);
+	unbind_from_irqhandler(per_cpu(reboot_irq, cpu), NULL);
 	xen_spinlock_cleanup(cpu);
 }
 #endif
--- sle11sp1-2010-03-22.orig/drivers/xen/fbfront/xenfb.c	2009-11-06 10:51:42.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/fbfront/xenfb.c	2009-11-06 10:52:09.000000000 +0100
@@ -597,7 +597,7 @@ static int __devinit xenfb_probe(struct 
 		fb_size = XENFB_DEFAULT_FB_LEN;
 	}
 
-	dev->dev.driver_data = info;
+	dev_set_drvdata(&dev->dev, info);
 	info->xbdev = dev;
 	info->irq = -1;
 	info->x1 = info->y1 = INT_MAX;
@@ -701,7 +701,7 @@ static int __devinit xenfb_probe(struct 
 
 static int xenfb_resume(struct xenbus_device *dev)
 {
-	struct xenfb_info *info = dev->dev.driver_data;
+	struct xenfb_info *info = dev_get_drvdata(&dev->dev);
 
 	xenfb_disconnect_backend(info);
 	xenfb_init_shared_page(info, info->fb_info);
@@ -710,7 +710,7 @@ static int xenfb_resume(struct xenbus_de
 
 static int xenfb_remove(struct xenbus_device *dev)
 {
-	struct xenfb_info *info = dev->dev.driver_data;
+	struct xenfb_info *info = dev_get_drvdata(&dev->dev);
 
 	del_timer(&info->refresh);
 	if (info->kthread)
@@ -817,7 +817,7 @@ static void xenfb_disconnect_backend(str
 static void xenfb_backend_changed(struct xenbus_device *dev,
 				  enum xenbus_state backend_state)
 {
-	struct xenfb_info *info = dev->dev.driver_data;
+	struct xenfb_info *info = dev_get_drvdata(&dev->dev);
 	int val;
 
 	switch (backend_state) {
--- sle11sp1-2010-03-22.orig/drivers/xen/fbfront/xenkbd.c	2009-11-06 10:51:42.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/fbfront/xenkbd.c	2009-11-06 10:52:09.000000000 +0100
@@ -113,7 +113,7 @@ int __devinit xenkbd_probe(struct xenbus
 		xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
 		return -ENOMEM;
 	}
-	dev->dev.driver_data = info;
+	dev_set_drvdata(&dev->dev, info);
 	info->xbdev = dev;
 	snprintf(info->phys, sizeof(info->phys), "xenbus/%s", dev->nodename);
 
@@ -186,7 +186,7 @@ int __devinit xenkbd_probe(struct xenbus
 
 static int xenkbd_resume(struct xenbus_device *dev)
 {
-	struct xenkbd_info *info = dev->dev.driver_data;
+	struct xenkbd_info *info = dev_get_drvdata(&dev->dev);
 
 	xenkbd_disconnect_backend(info);
 	info->page->in_cons = info->page->in_prod = 0;
@@ -196,7 +196,7 @@ static int xenkbd_resume(struct xenbus_d
 
 static int xenkbd_remove(struct xenbus_device *dev)
 {
-	struct xenkbd_info *info = dev->dev.driver_data;
+	struct xenkbd_info *info = dev_get_drvdata(&dev->dev);
 
 	xenkbd_disconnect_backend(info);
 	input_unregister_device(info->kbd);
@@ -262,7 +262,7 @@ static void xenkbd_disconnect_backend(st
 static void xenkbd_backend_changed(struct xenbus_device *dev,
 				   enum xenbus_state backend_state)
 {
-	struct xenkbd_info *info = dev->dev.driver_data;
+	struct xenkbd_info *info = dev_get_drvdata(&dev->dev);
 	int ret, val;
 
 	switch (backend_state) {
--- sle11sp1-2010-03-22.orig/drivers/xen/netback/accel.c	2009-11-06 10:45:48.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/netback/accel.c	2009-11-06 10:52:09.000000000 +0100
@@ -103,7 +103,7 @@ static int netback_accelerator_probe_bac
 	struct xenbus_device *xendev = to_xenbus_device(dev);
 
 	if (!strcmp("vif", xendev->devicetype)) {
-		struct backend_info *be = xendev->dev.driver_data;
+		struct backend_info *be = dev_get_drvdata(&xendev->dev);
 
 		if (match_accelerator(xendev, be, accelerator) &&
 		    try_module_get(accelerator->hooks->owner)) {
@@ -124,7 +124,7 @@ static int netback_accelerator_remove_ba
 		(struct netback_accelerator *)arg;
 	
 	if (!strcmp("vif", xendev->devicetype)) {
-		struct backend_info *be = xendev->dev.driver_data;
+		struct backend_info *be = dev_get_drvdata(&xendev->dev);
 
 		if (be->accelerator == accelerator) {
 			be->accelerator->hooks->remove(xendev);
--- sle11sp1-2010-03-22.orig/drivers/xen/netback/loopback.c	2009-11-06 10:51:55.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/netback/loopback.c	2009-11-06 10:52:09.000000000 +0100
@@ -137,8 +137,8 @@ static int loopback_start_xmit(struct sk
 		return 0;
 	}
 
-	dst_release(skb->dst);
-	skb->dst = NULL;
+	dst_release(skb_dst(skb));
+	skb_dst_set(skb, NULL);
 
 	skb_orphan(skb);
 
--- sle11sp1-2010-03-22.orig/drivers/xen/netback/xenbus.c	2009-11-06 10:51:17.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/netback/xenbus.c	2009-11-06 10:52:09.000000000 +0100
@@ -35,7 +35,7 @@ static void backend_create_netif(struct 
 
 static int netback_remove(struct xenbus_device *dev)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 
 	netback_remove_accelerators(be, dev);
 
@@ -45,7 +45,7 @@ static int netback_remove(struct xenbus_
 		be->netif = NULL;
 	}
 	kfree(be);
-	dev->dev.driver_data = NULL;
+	dev_set_drvdata(&dev->dev, NULL);
 	return 0;
 }
 
@@ -70,7 +70,7 @@ static int netback_probe(struct xenbus_d
 	}
 
 	be->dev = dev;
-	dev->dev.driver_data = be;
+	dev_set_drvdata(&dev->dev, be);
 
 	sg = 1;
 	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
@@ -151,7 +151,7 @@ fail:
  */
 static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env)
 {
-	struct backend_info *be = xdev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&xdev->dev);
 	netif_t *netif = be->netif;
 	char *val;
 
@@ -207,7 +207,7 @@ static void backend_create_netif(struct 
 static void frontend_changed(struct xenbus_device *dev,
 			     enum xenbus_state frontend_state)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 
 	DPRINTK("%s", xenbus_strstate(frontend_state));
 
--- sle11sp1-2010-03-22.orig/drivers/xen/netfront/netfront.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/netfront/netfront.c	2009-11-06 10:52:09.000000000 +0100
@@ -257,7 +257,7 @@ static int __devinit netfront_probe(stru
 	}
 
 	info = netdev_priv(netdev);
-	dev->dev.driver_data = info;
+	dev_set_drvdata(&dev->dev, info);
 
 	err = register_netdev(info->netdev);
 	if (err) {
@@ -278,13 +278,13 @@ static int __devinit netfront_probe(stru
 
  fail:
 	free_netdev(netdev);
-	dev->dev.driver_data = NULL;
+	dev_set_drvdata(&dev->dev, NULL);
 	return err;
 }
 
 static int __devexit netfront_remove(struct xenbus_device *dev)
 {
-	struct netfront_info *info = dev->dev.driver_data;
+	struct netfront_info *info = dev_get_drvdata(&dev->dev);
 
 	DPRINTK("%s\n", dev->nodename);
 
@@ -306,14 +306,14 @@ static int __devexit netfront_remove(str
 
 static int netfront_suspend(struct xenbus_device *dev)
 {
-	struct netfront_info *info = dev->dev.driver_data;
+	struct netfront_info *info = dev_get_drvdata(&dev->dev);
 	return netfront_accelerator_suspend(info, dev);
 }
 
 
 static int netfront_suspend_cancel(struct xenbus_device *dev)
 {
-	struct netfront_info *info = dev->dev.driver_data;
+	struct netfront_info *info = dev_get_drvdata(&dev->dev);
 	return netfront_accelerator_suspend_cancel(info, dev);
 }
 
@@ -326,7 +326,7 @@ static int netfront_suspend_cancel(struc
  */
 static int netfront_resume(struct xenbus_device *dev)
 {
-	struct netfront_info *info = dev->dev.driver_data;
+	struct netfront_info *info = dev_get_drvdata(&dev->dev);
 
 	DPRINTK("%s\n", dev->nodename);
 
@@ -531,7 +531,7 @@ static int setup_device(struct xenbus_de
 static void backend_changed(struct xenbus_device *dev,
 			    enum xenbus_state backend_state)
 {
-	struct netfront_info *np = dev->dev.driver_data;
+	struct netfront_info *np = dev_get_drvdata(&dev->dev);
 	struct net_device *netdev = np->netdev;
 
 	DPRINTK("%s\n", xenbus_strstate(backend_state));
--- sle11sp1-2010-03-22.orig/drivers/xen/pciback/xenbus.c	2009-11-06 10:49:47.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/pciback/xenbus.c	2009-11-06 10:52:09.000000000 +0100
@@ -24,7 +24,7 @@ static struct pciback_device *alloc_pdev
 	dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
 
 	pdev->xdev = xdev;
-	xdev->dev.driver_data = pdev;
+	dev_set_drvdata(&xdev->dev, pdev);
 
 	spin_lock_init(&pdev->dev_lock);
 
@@ -74,7 +74,7 @@ static void free_pdev(struct pciback_dev
 
 	pciback_release_devices(pdev);
 
-	pdev->xdev->dev.driver_data = NULL;
+	dev_set_drvdata(&pdev->xdev->dev, NULL);
 	pdev->xdev = NULL;
 
 	kfree(pdev);
@@ -475,7 +475,7 @@ static int pciback_reconfigure(struct pc
 static void pciback_frontend_changed(struct xenbus_device *xdev,
 				     enum xenbus_state fe_state)
 {
-	struct pciback_device *pdev = xdev->dev.driver_data;
+	struct pciback_device *pdev = dev_get_drvdata(&xdev->dev);
 
 	dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
 
@@ -668,7 +668,7 @@ static int pciback_xenbus_probe(struct x
 
 static int pciback_xenbus_remove(struct xenbus_device *dev)
 {
-	struct pciback_device *pdev = dev->dev.driver_data;
+	struct pciback_device *pdev = dev_get_drvdata(&dev->dev);
 
 	if (pdev != NULL)
 		free_pdev(pdev);
--- sle11sp1-2010-03-22.orig/drivers/xen/pcifront/pci_op.c	2009-11-06 10:46:27.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/pcifront/pci_op.c	2009-11-06 10:52:09.000000000 +0100
@@ -416,7 +416,7 @@ void pci_frontend_disable_msi(struct pci
 #endif /* CONFIG_PCI_MSI */
 
 /* Claim resources for the PCI frontend as-is, backend won't allow changes */
-static void pcifront_claim_resource(struct pci_dev *dev, void *data)
+static int pcifront_claim_resource(struct pci_dev *dev, void *data)
 {
 	struct pcifront_device *pdev = data;
 	int i;
@@ -431,6 +431,8 @@ static void pcifront_claim_resource(stru
 			pci_claim_resource(dev, i);
 		}
 	}
+
+	return 0;
 }
 
 int __devinit pcifront_scan_root(struct pcifront_device *pdev,
--- sle11sp1-2010-03-22.orig/drivers/xen/pcifront/xenbus.c	2009-11-06 10:49:47.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/pcifront/xenbus.c	2009-11-06 10:52:09.000000000 +0100
@@ -34,7 +34,7 @@ static struct pcifront_device *alloc_pde
 	/*Flag for registering PV AER handler*/
 	set_bit(_XEN_PCIB_AERHANDLER, (void*)&pdev->sh_info->flags);
 
-	xdev->dev.driver_data = pdev;
+	dev_set_drvdata(&xdev->dev, pdev);
 	pdev->xdev = xdev;
 
 	INIT_LIST_HEAD(&pdev->root_buses);
@@ -70,7 +70,7 @@ static void free_pdev(struct pcifront_de
 		gnttab_end_foreign_access(pdev->gnt_ref,
 					  (unsigned long)pdev->sh_info);
 
-	pdev->xdev->dev.driver_data = NULL;
+	dev_set_drvdata(&pdev->xdev->dev, NULL);
 
 	kfree(pdev);
 }
@@ -381,7 +381,7 @@ static int pcifront_detach_devices(struc
 static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev,
 						  enum xenbus_state be_state)
 {
-	struct pcifront_device *pdev = xdev->dev.driver_data;
+	struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
 
 	switch (be_state) {
 	case XenbusStateUnknown:
@@ -431,8 +431,8 @@ static int pcifront_xenbus_probe(struct 
 
 static int pcifront_xenbus_remove(struct xenbus_device *xdev)
 {
-	if (xdev->dev.driver_data)
-		free_pdev(xdev->dev.driver_data);
+	if (dev_get_drvdata(&xdev->dev))
+		free_pdev(dev_get_drvdata(&xdev->dev));
 
 	return 0;
 }
--- sle11sp1-2010-03-22.orig/drivers/xen/scsiback/scsiback.c	2010-01-04 12:35:40.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/scsiback/scsiback.c	2010-01-04 12:41:57.000000000 +0100
@@ -224,7 +224,7 @@ static void scsiback_cmd_done(struct req
 	int errors;
 
 	sense_buffer = req->sense;
-	resid        = req->data_len;
+	resid        = blk_rq_bytes(req);
 	errors       = req->errors;
 
 	if (errors != 0) {
@@ -339,21 +339,6 @@ fail_flush:
 	return -ENOMEM;
 }
 
-/* quoted scsi_lib.c/scsi_merge_bio */
-static int scsiback_merge_bio(struct request *rq, struct bio *bio)
-{
-	struct request_queue *q = rq->q;
-
-	bio->bi_flags &= ~(1 << BIO_SEG_VALID);
-	if (rq_data_dir(rq) == WRITE)
-		bio->bi_rw |= (1 << BIO_RW);
-
-	blk_queue_bounce(q, &bio);
-
-	return blk_rq_append_bio(q, rq, bio);
-}
-
-
 /* quoted scsi_lib.c/scsi_bi_endio */
 static void scsiback_bi_endio(struct bio *bio, int error)
 {
@@ -363,29 +348,28 @@ static void scsiback_bi_endio(struct bio
 
 
 /* quoted scsi_lib.c/scsi_req_map_sg . */
-static int request_map_sg(struct request *rq, pending_req_t *pending_req, unsigned int count)
+static struct bio *request_map_sg(pending_req_t *pending_req)
 {
-	struct request_queue *q = rq->q;
-	int nr_pages;
-	unsigned int nsegs = count;
-	unsigned int data_len = 0, len, bytes, off;
+	struct request_queue *q = pending_req->sdev->request_queue;
+	unsigned int nsegs = (unsigned int)pending_req->nr_segments;
+	unsigned int i, len, bytes, off, nr_pages, nr_vecs = 0;
 	struct scatterlist *sg;
 	struct page *page;
-	struct bio *bio = NULL;
-	int i, err, nr_vecs = 0;
+	struct bio *bio = NULL, *bio_first = NULL, *bio_last = NULL;
+	int err;
 
 	for_each_sg (pending_req->sgl, sg, nsegs, i) {
 		page = sg_page(sg);
 		off = sg->offset;
 		len = sg->length;
-		data_len += len;
 
 		nr_pages = (len + off + PAGE_SIZE - 1) >> PAGE_SHIFT;
 		while (len > 0) {
 			bytes = min_t(unsigned int, len, PAGE_SIZE - off);
 
 			if (!bio) {
-				nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages);
+				nr_vecs = min_t(unsigned int, BIO_MAX_PAGES,
+					 	nr_pages);
 				nr_pages -= nr_vecs;
 				bio = bio_alloc(GFP_KERNEL, nr_vecs);
 				if (!bio) {
@@ -393,6 +377,11 @@ static int request_map_sg(struct request
 					goto free_bios;
 				}
 				bio->bi_end_io = scsiback_bi_endio;
+				if (bio_last)
+					bio_last->bi_next = bio;
+				else
+					bio_first = bio;
+				bio_last = bio;
 			}
 
 			if (bio_add_pc_page(q, bio, page, bytes, off) !=
@@ -403,11 +392,9 @@ static int request_map_sg(struct request
 			}
 
 			if (bio->bi_vcnt >= nr_vecs) {
-				err = scsiback_merge_bio(rq, bio);
-				if (err) {
-					bio_endio(bio, 0);
-					goto free_bios;
-				}
+				bio->bi_flags &= ~(1 << BIO_SEG_VALID);
+				if (pending_req->sc_data_direction == WRITE)
+					bio->bi_rw |= (1 << BIO_RW);
 				bio = NULL;
 			}
 
@@ -417,21 +404,15 @@ static int request_map_sg(struct request
 		}
 	}
 
-	rq->buffer   = rq->data = NULL;
-	rq->data_len = data_len;
-
-	return 0;
+	return bio_first;
 
 free_bios:
-	while ((bio = rq->bio) != NULL) {
-		rq->bio = bio->bi_next;
-		/*
-		 * call endio instead of bio_put incase it was bounced
-		 */
-		bio_endio(bio, 0);
+	while ((bio = bio_first) != NULL) {
+		bio_first = bio->bi_next;
+		bio_put(bio);
 	}
 
-	return err;
+	return ERR_PTR(err);
 }
 
 
@@ -439,7 +420,6 @@ void scsiback_cmd_exec(pending_req_t *pe
 {
 	int cmd_len  = (int)pending_req->cmd_len;
 	int data_dir = (int)pending_req->sc_data_direction;
-	unsigned int nr_segments = (unsigned int)pending_req->nr_segments;
 	unsigned int timeout;
 	struct request *rq;
 	int write;
@@ -453,7 +433,30 @@ void scsiback_cmd_exec(pending_req_t *pe
 		timeout = VSCSIIF_TIMEOUT;
 
 	write = (data_dir == DMA_TO_DEVICE);
-	rq = blk_get_request(pending_req->sdev->request_queue, write, GFP_KERNEL);
+	if (pending_req->nr_segments) {
+		struct bio *bio = request_map_sg(pending_req);
+
+		if (IS_ERR(bio)) {
+			printk(KERN_ERR "scsiback: SG Request Map Error\n");
+			return;
+		}
+
+		rq = blk_make_request(pending_req->sdev->request_queue, bio,
+				      GFP_KERNEL);
+		if (IS_ERR(rq)) {
+			printk(KERN_ERR "scsiback: Make Request Error\n");
+			return;
+		}
+
+		rq->buffer = NULL;
+	} else {
+		rq = blk_get_request(pending_req->sdev->request_queue, write,
+				     GFP_KERNEL);
+		if (unlikely(!rq)) {
+			printk(KERN_ERR "scsiback: Get Request Error\n");
+			return;
+		}
+	}
 
 	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->cmd_len = cmd_len;
@@ -468,14 +471,6 @@ void scsiback_cmd_exec(pending_req_t *pe
 	rq->timeout   = timeout;
 	rq->end_io_data = pending_req;
 
-	if (nr_segments) {
-
-		if (request_map_sg(rq, pending_req, nr_segments)) {
-			printk(KERN_ERR "scsiback: SG Request Map Error\n");
-			return;
-		}
-	}
-
 	scsiback_get(pending_req->info);
 	blk_execute_rq_nowait(rq->q, NULL, rq, 1, scsiback_cmd_done);
 
--- sle11sp1-2010-03-22.orig/drivers/xen/scsiback/xenbus.c	2009-11-06 10:46:41.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/scsiback/xenbus.c	2009-11-06 10:52:09.000000000 +0100
@@ -224,7 +224,7 @@ static void scsiback_do_lun_hotplug(stru
 static void scsiback_frontend_changed(struct xenbus_device *dev,
 					enum xenbus_state frontend_state)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 	int err;
 
 	switch (frontend_state) {
@@ -281,7 +281,7 @@ static void scsiback_frontend_changed(st
 
 static int scsiback_remove(struct xenbus_device *dev)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 
 	if (be->info) {
 		scsiback_disconnect(be->info);
@@ -291,7 +291,7 @@ static int scsiback_remove(struct xenbus
 	}
 
 	kfree(be);
-	dev->dev.driver_data = NULL;
+	dev_set_drvdata(&dev->dev, NULL);
 
 	return 0;
 }
@@ -314,7 +314,7 @@ static int scsiback_probe(struct xenbus_
 		return -ENOMEM;
 	}
 	be->dev = dev;
-	dev->dev.driver_data = be;
+	dev_set_drvdata(&dev->dev, be);
 
 	be->info = vscsibk_info_alloc(dev->otherend_id);
 	if (IS_ERR(be->info)) {
--- sle11sp1-2010-03-22.orig/drivers/xen/scsifront/xenbus.c	2009-11-06 10:49:47.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/scsifront/xenbus.c	2009-11-06 10:52:09.000000000 +0100
@@ -185,7 +185,7 @@ static int scsifront_probe(struct xenbus
 	info->host = host;
 
 
-	dev->dev.driver_data = info;
+	dev_set_drvdata(&dev->dev, info);
 	info->dev  = dev;
 
 	for (i = 0; i < VSCSIIF_MAX_REQS; i++) {
@@ -238,7 +238,7 @@ free_sring:
 
 static int scsifront_remove(struct xenbus_device *dev)
 {
-	struct vscsifrnt_info *info = dev->dev.driver_data;
+	struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
 
 	DPRINTK("%s: %s removed\n",__FUNCTION__ ,dev->nodename);
 
@@ -350,7 +350,7 @@ static void scsifront_do_lun_hotplug(str
 static void scsifront_backend_changed(struct xenbus_device *dev,
 				enum xenbus_state backend_state)
 {
-	struct vscsifrnt_info *info = dev->dev.driver_data;
+	struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
 
 	DPRINTK("%p %u %u\n", dev, dev->state, backend_state);
 
--- sle11sp1-2010-03-22.orig/drivers/xen/sfc_netback/accel_xenbus.c	2010-01-04 12:23:14.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/sfc_netback/accel_xenbus.c	2010-01-04 12:42:06.000000000 +0100
@@ -36,7 +36,7 @@
 #define NODENAME_PATH_FMT "backend/vif/%d/%d"
 
 #define NETBACK_ACCEL_FROM_XENBUS_DEVICE(_dev) (struct netback_accel *) \
-	((struct backend_info *)(_dev)->dev.driver_data)->netback_accel_priv
+	((struct backend_info *)dev_get_drvdata(&(_dev)->dev))->netback_accel_priv
 
 /* List of all the bends currently in existence. */
 struct netback_accel *bend_list = NULL;
@@ -615,7 +615,7 @@ int netback_accel_probe(struct xenbus_de
 	mutex_lock(&bend->bend_mutex);
 
 	/* ...and store it where we can get at it */
-	binfo = (struct backend_info *) dev->dev.driver_data;
+	binfo = dev_get_drvdata(&dev->dev);
 	binfo->netback_accel_priv = bend;
 	/* And vice-versa */
 	bend->hdev_data = dev;
@@ -729,7 +729,7 @@ int netback_accel_remove(struct xenbus_d
 	struct netback_accel *bend; 
 	int frontend_state;
 
-	binfo = (struct backend_info *) dev->dev.driver_data;
+	binfo = dev_get_drvdata(&dev->dev);
 	bend = (struct netback_accel *) binfo->netback_accel_priv;
 
 	DPRINTK("%s: dev %p bend %p\n", __FUNCTION__, dev, bend);
--- sle11sp1-2010-03-22.orig/drivers/xen/sfc_netfront/accel_xenbus.c	2009-11-06 10:49:47.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/sfc_netfront/accel_xenbus.c	2009-11-06 10:52:09.000000000 +0100
@@ -727,8 +727,7 @@ int netfront_accel_probe(struct net_devi
 
 int netfront_accel_remove(struct xenbus_device *dev)
 {
-	struct netfront_info *np =
-		(struct netfront_info *)dev->dev.driver_data;
+	struct netfront_info *np = dev_get_drvdata(&dev->dev);
 	netfront_accel_vnic *vnic = (netfront_accel_vnic *)np->accel_priv;
 
 	DPRINTK("%s %s\n", __FUNCTION__, dev->nodename);
--- sle11sp1-2010-03-22.orig/drivers/xen/sys-hypervisor.c	2010-03-22 12:07:54.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/sys-hypervisor.c	2009-11-06 10:52:09.000000000 +0100
@@ -18,6 +18,8 @@
 #include <xen/interface/xen.h>
 #include <xen/interface/version.h>
 
+#include "xenbus/xenbus_comms.h"
+
 #define HYPERVISOR_ATTR_RO(_name) \
 static struct hyp_sysfs_attr  _name##_attr = __ATTR_RO(_name)
 
@@ -116,9 +118,8 @@ static ssize_t uuid_show(struct hyp_sysf
 {
 	char *vm, *val;
 	int ret;
-	extern int xenstored_ready;
 
-	if (!xenstored_ready)
+	if (!is_xenstored_ready())
 		return -EBUSY;
 
 	vm = xenbus_read(XBT_NIL, "vm", "", NULL);
--- sle11sp1-2010-03-22.orig/drivers/xen/tpmback/xenbus.c	2009-11-06 10:46:41.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/tpmback/xenbus.c	2009-11-06 10:52:09.000000000 +0100
@@ -54,7 +54,7 @@ long int tpmback_get_instance(struct bac
 
 static int tpmback_remove(struct xenbus_device *dev)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 
 	if (!be) return 0;
 
@@ -70,7 +70,7 @@ static int tpmback_remove(struct xenbus_
 		be->tpmif = NULL;
 	}
 	kfree(be);
-	dev->dev.driver_data = NULL;
+	dev_set_drvdata(&dev->dev, NULL);
 	return 0;
 }
 
@@ -89,7 +89,7 @@ static int tpmback_probe(struct xenbus_d
 
 	be->is_instance_set = 0;
 	be->dev = dev;
-	dev->dev.driver_data = be;
+	dev_set_drvdata(&dev->dev, be);
 
 	err = xenbus_watch_path2(dev, dev->nodename,
 				 "instance", &be->backend_watch,
@@ -139,7 +139,7 @@ static void backend_changed(struct xenbu
 static void frontend_changed(struct xenbus_device *dev,
 			     enum xenbus_state frontend_state)
 {
-	struct backend_info *be = dev->dev.driver_data;
+	struct backend_info *be = dev_get_drvdata(&dev->dev);
 	int err;
 
 	switch (frontend_state) {
--- sle11sp1-2010-03-22.orig/drivers/xen/usbback/usbback.h	2009-11-06 10:45:48.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/usbback/usbback.h	2009-11-06 10:52:09.000000000 +0100
@@ -64,6 +64,12 @@
 
 struct usbstub;
 
+#ifndef BUS_ID_SIZE
+#define USBBACK_BUS_ID_SIZE 20
+#else
+#define USBBACK_BUS_ID_SIZE BUS_ID_SIZE
+#endif
+
 #define USB_DEV_ADDR_SIZE 128
 
 typedef struct usbif_st {
@@ -111,7 +117,7 @@ typedef struct usbif_st {
 struct vusb_port_id {
 	struct list_head id_list;
 
-	char phys_bus[BUS_ID_SIZE];
+	char phys_bus[USBBACK_BUS_ID_SIZE];
 	domid_t domid;
 	unsigned int handle;
 	int portnum;
--- sle11sp1-2010-03-22.orig/drivers/xen/usbback/usbstub.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/usbback/usbstub.c	2009-11-06 10:52:09.000000000 +0100
@@ -56,7 +56,7 @@ struct vusb_port_id *find_portid_by_busi
 
 	spin_lock_irqsave(&port_list_lock, flags);
 	list_for_each_entry(portid, &port_list, id_list) {
-		if (!(strncmp(portid->phys_bus, busid, BUS_ID_SIZE))) {
+		if (!(strncmp(portid->phys_bus, busid, USBBACK_BUS_ID_SIZE))) {
 			found = 1;
 			break;
 		}
@@ -110,7 +110,7 @@ int portid_add(const char *busid,
 	portid->handle = handle;
 	portid->portnum = portnum;
 
-	strncpy(portid->phys_bus, busid, BUS_ID_SIZE);
+	strncpy(portid->phys_bus, busid, USBBACK_BUS_ID_SIZE);
 
 	spin_lock_irqsave(&port_list_lock, flags);
 	list_add(&portid->id_list, &port_list);
@@ -228,7 +228,7 @@ static int usbstub_probe(struct usb_inte
 		usbbk_hotplug_notify(usbif, portid->portnum, udev->speed);
 	} else {
 		/* maybe already called and connected by other intf */
-		if (strncmp(stub->portid->phys_bus, busid, BUS_ID_SIZE))
+		if (strncmp(stub->portid->phys_bus, busid, USBBACK_BUS_ID_SIZE))
 			goto out; /* invalid call */
 	}
 
--- sle11sp1-2010-03-22.orig/drivers/xen/usbback/xenbus.c	2009-11-06 10:49:41.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/usbback/xenbus.c	2009-11-06 10:52:09.000000000 +0100
@@ -112,7 +112,7 @@ again:
 		 */
 		portid = find_portid(usbif->domid, usbif->handle, i);
 		if (portid) {
-			if ((strncmp(portid->phys_bus, busid, BUS_ID_SIZE)))
+			if ((strncmp(portid->phys_bus, busid, USBBACK_BUS_ID_SIZE)))
 				xenbus_dev_fatal(dev, err,
 					"can't add port/%d, remove first", i);
 			else
@@ -142,7 +142,7 @@ abort:
 
 static int usbback_remove(struct xenbus_device *dev)
 {
-	usbif_t *usbif = dev->dev.driver_data;
+	usbif_t *usbif = dev_get_drvdata(&dev->dev);
 	int i;
 
 	if (usbif->backend_watch.node) {
@@ -158,7 +158,7 @@ static int usbback_remove(struct xenbus_
 		usbif_disconnect(usbif);
 		usbif_free(usbif);;
 	}
-	dev->dev.driver_data = NULL;
+	dev_set_drvdata(&dev->dev, NULL);
 
 	return 0;
 }
@@ -182,7 +182,7 @@ static int usbback_probe(struct xenbus_d
 		return -ENOMEM;
 	}
 	usbif->xbdev = dev;
-	dev->dev.driver_data = usbif;
+	dev_set_drvdata(&dev->dev, usbif);
 
 	err = xenbus_scanf(XBT_NIL, dev->nodename,
 				"num-ports", "%d", &num_ports);
@@ -259,7 +259,7 @@ static int connect_rings(usbif_t *usbif)
 static void frontend_changed(struct xenbus_device *dev,
 				     enum xenbus_state frontend_state)
 {
-	usbif_t *usbif = dev->dev.driver_data;
+	usbif_t *usbif = dev_get_drvdata(&dev->dev);
 	int err;
 
 	switch (frontend_state) {
--- sle11sp1-2010-03-22.orig/drivers/xen/usbfront/xenbus.c	2010-03-22 12:25:37.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/usbfront/xenbus.c	2010-03-22 12:25:54.000000000 +0100
@@ -187,7 +187,7 @@ out:
 
 static int connect(struct xenbus_device *dev)
 {
-	struct usbfront_info *info = dev->dev.driver_data;
+	struct usbfront_info *info = dev_get_drvdata(&dev->dev);
 
 	usbif_conn_request_t *req;
 	int i, idx, err;
@@ -299,7 +299,7 @@ static int usbfront_probe(struct xenbus_
 	}
 
 	info = hcd_to_info(hcd);
-	dev->dev.driver_data = info;
+	dev_set_drvdata(&dev->dev, info);
 
 	err = usb_add_hcd(hcd, 0, 0);
 	if (err != 0) {
@@ -314,13 +314,13 @@ static int usbfront_probe(struct xenbus_
 
 fail:
 	usb_put_hcd(hcd);
-	dev->dev.driver_data = NULL;
+	dev_set_drvdata(&dev->dev, NULL);
 	return err;
 }
 
 static void usbfront_disconnect(struct xenbus_device *dev)
 {
-	struct usbfront_info *info = dev->dev.driver_data;
+	struct usbfront_info *info = dev_get_drvdata(&dev->dev);
 	struct usb_hcd *hcd = info_to_hcd(info);
 
 	usb_remove_hcd(hcd);
@@ -364,7 +364,7 @@ static void backend_changed(struct xenbu
 
 static int usbfront_remove(struct xenbus_device *dev)
 {
-	struct usbfront_info *info = dev->dev.driver_data;
+	struct usbfront_info *info = dev_get_drvdata(&dev->dev);
 	struct usb_hcd *hcd = info_to_hcd(info);
 
 	destroy_rings(info);
--- sle11sp1-2010-03-22.orig/drivers/xen/xenbus/xenbus_probe.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/xenbus/xenbus_probe.c	2009-12-04 11:30:36.000000000 +0100
@@ -91,6 +91,11 @@ static int xenbus_probe_frontend(const c
 
 static void xenbus_dev_shutdown(struct device *_dev);
 
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+static int xenbus_dev_suspend(struct device *dev, pm_message_t state);
+static int xenbus_dev_resume(struct device *dev);
+#endif
+
 /* If something in array of ids matches this device, return it. */
 static const struct xenbus_device_id *
 match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
@@ -227,6 +232,10 @@ static struct xen_bus_type xenbus_fronte
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29)
 		.dev_attrs = xenbus_dev_attrs,
 #endif
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+		.suspend   = xenbus_dev_suspend,
+		.resume    = xenbus_dev_resume,
+#endif
 	},
 #if defined(CONFIG_XEN) || defined(MODULE)
 	.dev = {
@@ -763,6 +772,9 @@ void xenbus_dev_changed(const char *node
 
 	kfree(root);
 }
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+EXPORT_SYMBOL_GPL(xenbus_dev_changed);
+#endif
 
 static void frontend_changed(struct xenbus_watch *watch,
 			     const char **vec, unsigned int len)
@@ -778,7 +790,11 @@ static struct xenbus_watch fe_watch = {
 	.callback = frontend_changed,
 };
 
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
+#else
 static int suspend_dev(struct device *dev, void *data)
+#endif
 {
 	int err = 0;
 	struct xenbus_driver *drv;
@@ -791,13 +807,18 @@ static int suspend_dev(struct device *de
 	drv = to_xenbus_driver(dev->driver);
 	xdev = container_of(dev, struct xenbus_device, dev);
 	if (drv->suspend)
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+		err = drv->suspend(xdev, state);
+#else
 		err = drv->suspend(xdev);
+#endif
 	if (err)
 		printk(KERN_WARNING
 		       "xenbus: suspend %s failed: %i\n", dev_name(dev), err);
 	return 0;
 }
 
+#if defined(CONFIG_XEN) || defined(MODULE)
 static int suspend_cancel_dev(struct device *dev, void *data)
 {
 	int err = 0;
@@ -818,8 +839,13 @@ static int suspend_cancel_dev(struct dev
 		       dev_name(dev), err);
 	return 0;
 }
+#endif
 
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+static int xenbus_dev_resume(struct device *dev)
+#else
 static int resume_dev(struct device *dev, void *data)
+#endif
 {
 	int err;
 	struct xenbus_driver *drv;
@@ -864,6 +890,7 @@ static int resume_dev(struct device *dev
 	return 0;
 }
 
+#if defined(CONFIG_XEN) || defined(MODULE)
 void xenbus_suspend(void)
 {
 	DPRINTK("");
@@ -893,6 +920,7 @@ void xenbus_suspend_cancel(void)
 	xenbus_backend_resume(suspend_cancel_dev);
 }
 EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
+#endif
 
 /* A flag to determine if xenstored is 'ready' (i.e. has started) */
 atomic_t xenbus_xsd_state = ATOMIC_INIT(XENBUS_XSD_UNCOMMITTED);
--- sle11sp1-2010-03-22.orig/drivers/xen/xenbus/xenbus_probe_backend.c	2009-11-06 10:52:02.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/xenbus/xenbus_probe_backend.c	2009-11-06 10:52:09.000000000 +0100
@@ -73,7 +73,7 @@ static int read_frontend_details(struct 
 }
 
 /* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
-static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
+static int backend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename)
 {
 	int domid, err;
 	const char *devid, *type, *frontend;
@@ -103,8 +103,8 @@ static int backend_bus_id(char bus_id[BU
 	if (err)
 		return err;
 
-	if (snprintf(bus_id, BUS_ID_SIZE,
-		     "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
+	if (snprintf(bus_id, XEN_BUS_ID_SIZE, "%.*s-%i-%s",
+		     typelen, type, domid, devid) >= XEN_BUS_ID_SIZE)
 		return -ENOSPC;
 	return 0;
 }
--- sle11sp1-2010-03-22.orig/drivers/xen/xenbus/xenbus_xs.c	2009-11-06 10:51:55.000000000 +0100
+++ sle11sp1-2010-03-22/drivers/xen/xenbus/xenbus_xs.c	2009-11-06 10:52:09.000000000 +0100
@@ -723,6 +723,10 @@ void xs_resume(void)
 	struct xenbus_watch *watch;
 	char token[sizeof(watch) * 2 + 1];
 
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+	xb_init_comms();
+#endif
+
 	mutex_unlock(&xs_state.response_mutex);
 	mutex_unlock(&xs_state.request_mutex);
 	transaction_resume();
--- sle11sp1-2010-03-22.orig/include/Kbuild	2009-12-18 10:50:15.000000000 +0100
+++ sle11sp1-2010-03-22/include/Kbuild	2009-12-18 10:51:55.000000000 +0100
@@ -8,6 +8,5 @@ header-y += mtd/
 header-y += rdma/
 header-y += video/
 header-y += drm/
-header-y += xen/public/
 header-y += xen/
 header-y += scsi/
--- sle11sp1-2010-03-22.orig/include/xen/Kbuild	2010-03-22 12:07:54.000000000 +0100
+++ sle11sp1-2010-03-22/include/xen/Kbuild	2009-12-18 10:10:56.000000000 +0100
@@ -1 +1 @@
-header-y += evtchn.h
+header-y += public/
--- sle11sp1-2010-03-22.orig/include/xen/xenbus.h	2009-11-06 10:51:55.000000000 +0100
+++ sle11sp1-2010-03-22/include/xen/xenbus.h	2009-11-06 10:52:09.000000000 +0100
@@ -104,8 +104,12 @@ struct xenbus_driver {
 	void (*otherend_changed)(struct xenbus_device *dev,
 				 enum xenbus_state backend_state);
 	int (*remove)(struct xenbus_device *dev);
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+	int (*suspend)(struct xenbus_device *dev, pm_message_t state);
+#else
 	int (*suspend)(struct xenbus_device *dev);
 	int (*suspend_cancel)(struct xenbus_device *dev);
+#endif
 	int (*resume)(struct xenbus_device *dev);
 	int (*uevent)(struct xenbus_device *, struct kobj_uevent_env *);
 	struct device_driver driver;
--- sle11sp1-2010-03-22.orig/lib/swiotlb-xen.c	2009-12-14 17:28:34.000000000 +0100
+++ sle11sp1-2010-03-22/lib/swiotlb-xen.c	2009-12-14 17:30:30.000000000 +0100
@@ -39,8 +39,8 @@ int swiotlb;
 int swiotlb_force;
 
 /*
- * Used to do a quick range check in swiotlb_unmap_single and
- * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
+ * Used to do a quick range check in unmap_single and
+ * sync_single_*, to see if the memory was in fact allocated by this
  * API.
  */
 static char *io_tlb_start, *io_tlb_end;
@@ -159,7 +159,7 @@ dma_addr_t swiotlb_phys_to_bus(struct de
 	return phys_to_machine(paddr);
 }
 
-phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
+phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
 {
 	return machine_to_phys(baddr);
 }
@@ -170,9 +170,15 @@ static dma_addr_t swiotlb_virt_to_bus(st
 	return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
 }
 
-static void *swiotlb_bus_to_virt(dma_addr_t address)
+void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address)
 {
-	return phys_to_virt(swiotlb_bus_to_phys(address));
+	return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
+}
+
+int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
+					       dma_addr_t addr, size_t size)
+{
+	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
 }
 
 int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
@@ -307,7 +313,7 @@ static void swiotlb_bounce(phys_addr_t p
 		unsigned long flags;
 
 		while (size) {
-			sz = min((size_t)(PAGE_SIZE - offset), size);
+			sz = min_t(size_t, PAGE_SIZE - offset, size);
 
 			local_irq_save(flags);
 			buffer = kmap_atomic(pfn_to_page(pfn),
@@ -441,7 +447,7 @@ found:
  * dma_addr is the kernel virtual address of the bounce buffer to unmap.
  */
 static void
-unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
 {
 	unsigned long flags;
 	int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -521,7 +527,7 @@ swiotlb_full(struct device *dev, size_t 
  * PCI address to use is returned.
  *
  * Once the device is given the dma address, the device owns this memory until
- * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
+ * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
  */
 dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 			    unsigned long offset, size_t size,
@@ -535,7 +541,7 @@ dma_addr_t swiotlb_map_page(struct devic
 	BUG_ON(dir == DMA_NONE);
 
 	/*
-	 * If the pointer passed in happens to be in the device's DMA window,
+	 * If the address happens to be in the device's DMA window,
 	 * we can safely return the device addr and not worry about bounce
 	 * buffering it.
 	 */
@@ -560,23 +566,32 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
 
 /*
  * Unmap a single streaming mode DMA translation.  The dma_addr and size must
- * match what was provided for in a previous swiotlb_map_single call.  All
+ * match what was provided for in a previous swiotlb_map_page call.  All
  * other usages are undefined.
  *
  * After this call, reads by the cpu to the buffer are guaranteed to see
  * whatever the device wrote there.
  */
+static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
+			 size_t size, int dir)
+{
+	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+
+	BUG_ON(dir == DMA_NONE);
+
+	if (is_swiotlb_buffer(dev_addr)) {
+		do_unmap_single(hwdev, dma_addr, size, dir);
+		return;
+	}
+
+	gnttab_dma_unmap_page(dev_addr);
+}
+
 void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
 			size_t size, enum dma_data_direction dir,
 			struct dma_attrs *attrs)
 {
-	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
-
-	BUG_ON(dir == DMA_NONE);
-	if (is_swiotlb_buffer(dev_addr))
-		unmap_single(hwdev, dma_addr, size, dir);
-	else
-		gnttab_dma_unmap_page(dev_addr);
+	unmap_single(hwdev, dev_addr, size, dir);
 }
 EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
 
@@ -584,7 +599,7 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
  * Make physical memory consistent for a single streaming mode DMA translation
  * after a transfer.
  *
- * If you perform a swiotlb_map_single() but wish to interrogate the buffer
+ * If you perform a swiotlb_map_page() but wish to interrogate the buffer
  * using the cpu, yet do not wish to teardown the PCI dma mapping, you must
  * call this function before doing so.  At the next point you give the PCI dma
  * address back to the card, you must first perform a
@@ -594,9 +609,10 @@ void
 swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
 			    size_t size, enum dma_data_direction dir)
 {
-	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
+	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
+
 	if (is_swiotlb_buffer(dev_addr))
 		sync_single(hwdev, dma_addr, size, dir);
 }
@@ -606,9 +622,10 @@ void
 swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
 			       size_t size, enum dma_data_direction dir)
 {
-	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
+	char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
+
 	if (is_swiotlb_buffer(dev_addr))
 		sync_single(hwdev, dma_addr, size, dir);
 }
@@ -619,11 +636,7 @@ swiotlb_sync_single_range_for_cpu(struct
 				  unsigned long offset, size_t size,
 				  enum dma_data_direction dir)
 {
-	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
-
-	BUG_ON(dir == DMA_NONE);
-	if (is_swiotlb_buffer(dev_addr))
-		sync_single(hwdev, dma_addr + offset, size, dir);
+	swiotlb_sync_single_for_cpu(hwdev, dev_addr + offset, size, dir);
 }
 EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
 
@@ -632,17 +645,13 @@ swiotlb_sync_single_range_for_device(str
 				     unsigned long offset, size_t size,
 				     enum dma_data_direction dir)
 {
-	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
-
-	BUG_ON(dir == DMA_NONE);
-	if (is_swiotlb_buffer(dev_addr))
-		sync_single(hwdev, dma_addr + offset, size, dir);
+	swiotlb_sync_single_for_device(hwdev, dev_addr + offset, size, dir);
 }
 EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
 
 /*
  * Map a set of buffers described by scatterlist in streaming mode for DMA.
- * This is the scatter-gather version of the above swiotlb_map_single
+ * This is the scatter-gather version of the above swiotlb_map_page
  * interface.  Here the scatter gather list elements are each tagged with the
  * appropriate dma address and length.  They are obtained via
  * sg_dma_{address,length}(SG).
@@ -653,7 +662,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_ra
  *       The routine returns the number of addr/length pairs actually
  *       used, at most nents.
  *
- * Device ownership issues as mentioned above for swiotlb_map_single are the
+ * Device ownership issues as mentioned above for swiotlb_map_page are the
  * same here.
  */
 int
@@ -706,7 +715,7 @@ EXPORT_SYMBOL(swiotlb_map_sg);
 
 /*
  * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
- * concerning calls here are the same as for swiotlb_unmap_single() above.
+ * concerning calls here are the same as for swiotlb_unmap_page() above.
  */
 void
 swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
@@ -717,13 +726,9 @@ swiotlb_unmap_sg_attrs(struct device *hw
 
 	BUG_ON(dir == DMA_NONE);
 
-	for_each_sg(sgl, sg, nelems, i) {
-		if (sg->dma_address != sg_phys(sg))
-			unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
-				     sg->dma_length, dir);
-		else
-			gnttab_dma_unmap_page(sg->dma_address);
-	}
+	for_each_sg(sgl, sg, nelems, i)
+		unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
+
 }
 EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
 
@@ -749,13 +754,9 @@ swiotlb_sync_sg_for_cpu(struct device *h
 	struct scatterlist *sg;
 	int i;
 
-	BUG_ON(dir == DMA_NONE);
-
-	for_each_sg(sgl, sg, nelems, i) {
-		if (sg->dma_address != sg_phys(sg))
-			sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
-				    sg->dma_length, dir);
-	}
+	for_each_sg(sgl, sg, nelems, i)
+		swiotlb_sync_single_for_cpu(hwdev, sg->dma_address,
+					    sg->dma_length, dir);
 }
 EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
 
@@ -766,13 +767,9 @@ swiotlb_sync_sg_for_device(struct device
 	struct scatterlist *sg;
 	int i;
 
-	BUG_ON(dir == DMA_NONE);
-
-	for_each_sg(sgl, sg, nelems, i) {
-		if (sg->dma_address != sg_phys(sg))
-			sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
-				    sg->dma_length, dir);
-	}
+	for_each_sg(sgl, sg, nelems, i)
+		swiotlb_sync_single_for_device(hwdev, sg->dma_address,
+					       sg->dma_length, dir);
 }
 EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
 
--- sle11sp1-2010-03-22.orig/mm/init-mm.c	2010-03-22 12:07:54.000000000 +0100
+++ sle11sp1-2010-03-22/mm/init-mm.c	2009-11-06 10:52:09.000000000 +0100
@@ -8,6 +8,10 @@
 #include <asm/atomic.h>
 #include <asm/pgtable.h>
 
+#ifdef CONFIG_X86_XEN
+#define swapper_pg_dir ((pgd_t *)NULL)
+#endif
+
 struct mm_struct init_mm = {
 	.mm_rb		= RB_ROOT,
 	.pgd		= swapper_pg_dir,
--- sle11sp1-2010-03-22.orig/mm/memory.c	2010-03-11 09:18:45.000000000 +0100
+++ sle11sp1-2010-03-22/mm/memory.c	2010-03-11 09:20:02.000000000 +0100
@@ -1334,7 +1334,7 @@ int __get_user_pages(struct task_struct 
 					vmas[i] = vma;
 				i++;
 				start += PAGE_SIZE;
-				len--;
+				nr_pages--;
 				continue;
 			}
 		}
--- sle11sp1-2010-03-22.orig/mm/page_alloc.c	2010-03-22 12:25:41.000000000 +0100
+++ sle11sp1-2010-03-22/mm/page_alloc.c	2010-03-22 12:25:51.000000000 +0100
@@ -637,6 +637,7 @@ static void __free_pages_ok(struct page 
 
 #ifdef CONFIG_XEN
 	if (PageForeign(page)) {
+		WARN_ON(wasMlocked);
 		PageForeignDestructor(page, order);
 		return;
 	}
@@ -1143,6 +1144,7 @@ static void free_hot_cold_page(struct pa
 
 #ifdef CONFIG_XEN
 	if (PageForeign(page)) {
+		WARN_ON(wasMlocked);
 		PageForeignDestructor(page, 0);
 		return;
 	}