Sophie

Sophie

distrib > Mandriva > 8.2 > i586 > media > main-src > by-pkgid > 276c4c69ed5efcebe43ee6121194cf59 > files > 56

kernel22-2.2.20-9mdk.src.rpm

diff -urN 2.2.18pre17aa1/arch/i386/kernel/entry.S nmi/arch/i386/kernel/entry.S
--- 2.2.18pre17aa1/arch/i386/kernel/entry.S	Mon Oct 23 19:40:19 2000
+++ nmi/arch/i386/kernel/entry.S	Mon Oct 23 19:40:48 2000
@@ -309,9 +309,14 @@
 	jmp error_code
 
 ENTRY(nmi)
+	pushl %eax
+	SAVE_ALL
+	movl %esp,%edx
 	pushl $0
-	pushl $ SYMBOL_NAME(do_nmi)
-	jmp error_code
+	pushl %edx
+	call SYMBOL_NAME(do_nmi)
+	addl $8,%esp
+	RESTORE_ALL
 
 ENTRY(int3)
 	pushl $0
diff -urN 2.2.18pre17aa1/arch/i386/kernel/io_apic.c nmi/arch/i386/kernel/io_apic.c
--- 2.2.18pre17aa1/arch/i386/kernel/io_apic.c	Tue Sep  5 02:28:38 2000
+++ nmi/arch/i386/kernel/io_apic.c	Mon Oct 23 19:40:48 2000
@@ -21,6 +21,9 @@
  */
 #define IO_APIC_BASE(idx) ((volatile int *)__fix_to_virt(FIX_IO_APIC_BASE_0 + idx))
 
+static int nmi_pin __initdata = -1;
+int nmi_irq = -1;
+
 /*
  * The structure of the IO-APIC:
  */
@@ -638,6 +641,16 @@
 
 		if (!apic && !IO_APIC_IRQ(irq))
 			continue;
+		if (irq == nmi_irq) {
+			entry.delivery_mode = 4; /* broadcast NMI */
+			make_8259A_irq(irq);
+			/*
+			 * Remember which register has the NMI IRQ entry,
+			 * so we can turn it off in case there is some
+			 * screwup
+			 */
+			nmi_pin = pin;
+		}
 
 		entry.vector = assign_irq_vector(irq);
 
@@ -1196,6 +1209,8 @@
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
 	for (i = 0; i < NR_IRQS ; i++) {
+		if (i == nmi_irq)
+			continue;
 		if (IO_APIC_VECTOR(i) > 0) {
 			if (IO_APIC_irq_trigger(i))
 				irq_desc[i].handler = &ioapic_level_irq_type;
@@ -1237,6 +1252,8 @@
 {
 	int pin1, pin2;
 
+	if (nmi_irq != -1)
+		printk("NMI Watchdog activated on source IRQ %d\n", nmi_irq);
 	pin1 = find_timer_pin(mp_INT);
 	pin2 = find_timer_pin(mp_ExtINT);
 	enable_IO_APIC_irq(0);
@@ -1274,6 +1291,8 @@
 			}
 		}
 		printk(" works.\n");
+		if ((nmi_pin != -1) && (nmi_irq == 0))
+			printk("NMI Watchdog disabled (source IRQ was 0)!\n");
 	}
 }
 
diff -urN 2.2.18pre17aa1/arch/i386/kernel/smp.c nmi/arch/i386/kernel/smp.c
--- 2.2.18pre17aa1/arch/i386/kernel/smp.c	Mon Oct 23 19:40:18 2000
+++ nmi/arch/i386/kernel/smp.c	Mon Oct 23 19:40:48 2000
@@ -2064,6 +2064,8 @@
 	 */
 }
 
+unsigned int apic_timer_irqs[NR_CPUS];
+
 /*
  * Local APIC timer interrupt. This is the most natural way for doing
  * local interrupts, but local timer interrupts can be emulated by
@@ -2074,6 +2076,13 @@
  */
 void smp_apic_timer_interrupt(struct pt_regs * regs)
 {
+	extern int nmi_irq;
+	/*
+	 * the only thing that can lock an NMI is an unACK-ed APIC ...
+	 */
+	if (nmi_irq >= 0)
+		apic_timer_irqs[smp_processor_id()]++;
+
 	/*
 	 * NOTE! We'd better ACK the irq immediately,
 	 * because timer handling can be slow, and we
diff -urN 2.2.18pre17aa1/arch/i386/kernel/traps.c nmi/arch/i386/kernel/traps.c
--- 2.2.18pre17aa1/arch/i386/kernel/traps.c	Mon Oct 23 19:40:18 2000
+++ nmi/arch/i386/kernel/traps.c	Mon Oct 23 19:41:11 2000
@@ -5,6 +5,8 @@
  *
  *  Pentium III FXSR, SSE support
  *	Gareth Hughes <gareth@valinux.com>, May 2000
+ *
+ *  1998, Ingo Molnar, added NMI-Watchdog driver
  */
 
 /*
@@ -336,12 +338,90 @@
 	printk("Do you have a strange power saving mode enabled?\n");
 }
 
+#ifdef CONFIG_SMP
+static int __init setup_nmi_irq(char *str)
+{
+	extern int nmi_irq;
+	int ints[11];
+
+	get_options(str, ints);
+	if (ints[0] == 1)
+		nmi_irq = ints[1];
+	return 1;
+}
+
+__setup("nmi_irq=", setup_nmi_irq);
+
+static void nmi_watchdog(struct pt_regs * regs)
+{
+	/*
+	 * the best way to detect wether a CPU has a 'hard lockup' problem
+	 * is to check it's local APIC timer IRQ counts. If they are not
+	 * changing then that CPU has some problem.
+	 *
+	 * as these watchdog NMI IRQs are broadcasted to every CPU, here
+	 * we only have to check the current processor.
+	 *
+	 * since NMIs dont listen to _any_ locks, we have to be extremely
+	 * careful not to rely on unsafe variables. The printk might lock
+	 * up though, so we have to break up console_lock first ...
+	 * [when there will be more tty-related locks, break them up
+	 *  here too!]
+	 */
+	extern spinlock_t console_lock;
+	extern unsigned int apic_timer_irqs[NR_CPUS];
+	static unsigned int last_irq_sums[NR_CPUS], alert_counter[NR_CPUS];
+	static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED;
+
+	/*
+	 * Since current-> is always on the stack, and we always switch
+	 * the stack NMI-atomically, it's safe to use smp_processor_id().
+	 */
+	int sum, cpu = smp_processor_id();
+
+	sum = apic_timer_irqs[cpu];
+
+	if (last_irq_sums[cpu] == sum) {
+		/*
+		 * Ayiee, looks like this CPU is stuck ...
+		 * wait a few IRQs (5 seconds) before doing the oops ...
+		 */
+		alert_counter[cpu]++;
+		if (alert_counter[cpu] == 5*HZ) {
+			spin_lock(&nmi_print_lock);
+			/*
+			 * We are in trouble anyway, lets at least try
+			 * to get a message out.
+			 */
+			(void) spin_trylock(&console_lock);
+			spin_unlock(&console_lock);
+			printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", cpu);
+			show_registers(regs);
+			spin_unlock(&nmi_print_lock);
+			do_exit(SIGSEGV);
+		}
+	} else {
+		last_irq_sums[cpu] = sum;
+		alert_counter[cpu] = 0;
+	}
+}
+#endif
+
 asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
 {
 	unsigned char reason = inb(0x61);
 	extern atomic_t nmi_counter;
+#ifdef CONFIG_SMP
+	extern int nmi_irq;
+#endif
 
 	atomic_inc(&nmi_counter);
+#ifdef CONFIG_SMP
+	if (nmi_irq >= 0) {
+		nmi_watchdog(regs);
+		return;
+	}
+#endif
 	if (reason & 0x80)
 		mem_parity_error(reason, regs);
 	if (reason & 0x40)