[Search for users]
[Overall Top Noters]
[List of all Conferences]
[Download this site]
Title: | DIGITAL UNIX (FORMERLY KNOWN AS DEC OSF/1) |
Notice: | Welcome to the Digital UNIX Conference |
Moderator: | SMURF::DENHAM |
|
Created: | Thu Mar 16 1995 |
Last Modified: | Fri Jun 06 1997 |
Last Successful Update: | Fri Jun 06 1997 |
Number of topics: | 10068 |
Total number of notes: | 35879 |
9476.0. "Machine check 660 - local bus fault" by MGOF01::GVOMHAU () Fri Apr 11 1997 09:08
Systemcrash with machine check 660
A new System (A2100A-LP) crasht with vet after 5-10 minutes.
We have already changed the following parts:
CPU, Memory, IO-module, CBUS-module, ext. IO-module, remote-io-panel,
graphiscard, kspsa.
Software runnuing is DUNIX 3.2G with patches on an internal disk rz28.
The system is also crashing on an external disk(rz29) on kzpsa with
DUNIX 4.0A.
We stripped down the system so that we have only the neccessary parts
in it and the system crasht.
The only OLD things are the power-supply and cables.
Crashdata:
#
# Crash Data Collection (Version 1.4)
#
_crash_data_collection_time: Fri Apr 11 12:15:07 MET DST 1997
_current_directory: /
_crash_kernel: /var/adm/crash/vmunix.3
_crash_core: /var/adm/crash/vmcore.3
_crash_arch: alpha
_crash_os: Digital UNIX
_host_version: Digital UNIX V3.2G (Rev. 62); Fri Apr 11 10:37:32 MET
DST 1997
_crash_version: Digital UNIX V3.2G (Rev. 62); Fri Apr 11 10:37:32 MET
DST 1997
_crashtime: struct {
tv_sec = 860752150
tv_usec = 712480
}
_boottime: struct {
tv_sec = 860750348
tv_usec = 665632
}
_config: struct {
sysname = "OSF1"
nodename = "S13"
release = "V3.2"
version = "62"
machine = "alpha"
}
_cpu: 41
_system_string: 0xffffffffff8010b8 = "AlphaServer 2100A 5/300"
_ncpus: 1
_avail_cpus: 1
_partial_dump: 1
_physmem(MBytes): 127
_panic_string: 0xfffffc00004a1f10 = "System Uncorrectable Machine
Check 660"
_paniccpu: 0
_panic_thread: 0xfffffc0007d82b80
_preserved_message_buffer_begin:
struct {
msg_magic = 0x63061
msg_bufx = 0x1d1
msg_bufr = 0x436
msg_bufc = "em_err = 0000000000000000
mem_mcmd1 = e2000008e2000008
mem_mcmd2 = 0020059340200593
mem_mconf = 800150a8800150a8
mem_medc1 = 087c0833087c0833
mem_medc2 = 0000018000000180
mem_medcc = 2000000020000000
mem_msctl = 0000080000000800
mem_mref = 000001d8000001d8
mem_filter = 0000000000000000
panic (cpu 0): System Uncorrectable Machine Check 660
SIOP 0:DUMP.prom: dev SCSI 0 2001 0 3 300 0 0, block 131072
DUMP.prom: dev SCSI 0 2001 0 3 300 0 0, block 131072
sa0
lp0 at eisa0
fdi0 at eisa0
fd0 at fdi0 unit 0
pci2000 at pci0 slot 3
tu0: DECchip 21040-AA: Revision: 2.4
tu0 at pci2000 slot 0
tu0: DEC TULIP Ethernet Interface, hardware address: 00-00-F8-23-45-E4
tu0: console mode: selecting 10BaseT (UTP) port: half duplex: no link
psiop0 at pci2000 slot 1
Loading SIOP: script 1006200, reg 81800000, data 40520110
scsi0 at psiop0 slot 0
rz3 at scsi0 bus 0 target 3 lun 0 (DEC RZ28D (C) DEC 0008)
rz5 at scsi0 bus 0 target 5 lun 0 (DEC RRD45 (C) DEC 1645)
vga0 at pci0 slot 8
1024x768 (S3TRIO )
pci1 at ibus0 slot 1
dli: configured
Can't find OSF-BASE PAK
Machine Check SYSTEM Fatal Abort
pal temp[0-1] = 0000000484335d08 0000000000000003
pal temp[2-3] = fffffc00003ab420 0000000000004200
pal temp[4-5] = 000003ffc0903518 00000000003ffffd
pal temp[6-7] = 000000014000a940 fffffc00003aaea0
pal temp[8-9] = 1f1e161514020100 fffffc00003ab190
pal temp[10-11] = 0000000000014139 fffffc00003aaff0
pal temp[12-13] = fffffc00003ab390 0000012000000120
pal temp[14-15] = 0000000000000001 0000000000000000
pal temp[16-17] = 0000020306600001 0000000000000000
pal temp[18-19] = 000000011ffff4e0 ffffffff8830ba58
pal temp[20-21] = 0000000003512000 fffffc00003ab3c0
pal temp[22-23] = fffffc00004d1050 00000000030f1a58
shadow[0-1] = 0000000000000000 0000000000000000
shadow[2-3] = 0000000000000000 0000000000000000
shadow[4-5] = 0000000000000000 0000000000000000
shadow[6-7] = 0000000000000000 0000000000000000
Address of excepting instruction = 0000000000014139
Summary of arithmetic traps = 0000000000000000
Exception mask = 0000000000000000
Base address for PALcode = 0000000000014000
Interrupt Status Reg = 0000000000500000
CURRENT SETUP OF EV5 IBOX = 0000004160800000
I-CACHE Reg Tag parity error = 0000000000000000
D-CACHE error Reg = 0000000000000000
Effective VA = ffffffff8830ba28
reason for D-stream = 0000000000016ad1
EV5 Secondary Cache address = ffffff000787106f
EV5 Secondary Cache TAG/Data parity = 0000000000000000
EV5 BC_TAG_ADDR = ffffff80008d0fff
EV5 EI_STAT_ADDR Phys addr of Xfer = ffffff839a00001f
Fill Syndrome = 0000000000000007
EI_STAT reg = fffffff004ffffff
LD_LOCK = ffffff00004ccaef
CPU registers
CREG = 380003f238000002
ESREG = 0000000000000020
EVBCR = 0000006100000061
EVBEAR = 00680006001e35d0
EVBCER = 0000000000000000
EVBCEAR = b800000ab800000a
EVBUER = 9000000000420006
EVBUEAR = b800000e00285d68
EVRESV = 0000000000000000
DTCTR = 0000011100000111
DTER = 000000000085d680
DTTCR = 0000000000000000
DTTR = 8280000782800007
DTRESV = 0000000000000000
IBCSR = 0000100000001000
IBEAR = 4f200683e0000073
ACR = 0000012000000120
CBCR = 0000110100001001
CBER = 0000000000000000
CBEALR = 4f200683e0000093
CBEAHR = 0f200683e000009b
CBRESV = 0000000000000000
ALR = 004ccae1004ccae1
PMBR = 0000000000000000
IIRR = 0000000000000000
SICR = 0010001000000000
MRESV = 0000000000000000
PMR1 = 0000000000000000
PMR2 = 0000000000000000
PMR3 = 0000000000000000
PMR4 = 0000000000000000
PMR5 = 0000000000000000
T2 Registers
t2_iocsr = fe240e0d27020dc0
t2_cerr1 = 0000000000000001
t2_cerr2 = 00a175a000a175a0
t2_cerr3 = f083fffff083ffff
t2_perr1 = 0000000000080000
t2_perr2 = 00000006000a9ae8
t2_hae0_1 = 0000000000000010
t2_hae0_2 = 0000000000000000
t2_hbase = 000000000010603f
t2_wbase1 = 00000000400807ff
t2_wmask1 = 000000003ff00000
t2_tbase1 = 0000000000000000
t2_wbase2 = 00000000000c003f
t2_wmask2 = 0000000003f00000
t2_tbase2 = 00000000002b8000
Memory Module 0 Registers
m"
}
_preserved_message_buffer_end:
_kernel_process_status_begin:
PID COMM
00000 kernel idle
00001 init
00003 kloadsrv
00016 update
00103 syslogd
00105 binlogd
00196 portmap
00198 nfsiod
00199 nfsiod
00200 nfsiod
00201 nfsiod
00202 nfsiod
00203 nfsiod
00204 nfsiod
00252 sendmail
00302 inetd
00304 os_mibs
00308 snmpd
00310 cron
00330 lpd
00351 xdm
00354 Xdec
00355 getty
00356 xdm
00422 Xsession
00423 dxsession
00424 sh
00425 mwm
00426 dxconsole
00427 dxterm
00429 sh
00431 memx
00432 tail
00433 shmx
00434 memxr
00435 memxr
00436 memxr
00437 memxr
00438 memxr
00439 memxr
00440 memxr
00441 memxr
00442 memxr
00443 memxr
00444 memxr
00445 memxr
00446 memxr
00447 memxr
00448 memxr
00449 memxr
00450 memxr
00451 memxr
00452 memxr
00453 tail
00454 fsx
00455 tail
00456 shmxb
00457 fsxr
00458 fsxr
00459 fsxr
00460 fsxr
00461 fsxr
00462 fsxr
00463 fsxr
00464 fsxr
00465 fsxr
00466 fsxr
00467 fsxr
00468 fsxr
00469 fsxr
00470 fsxr
00471 fsxr
00472 fsxr
00473 fsxr
00474 fsxr
00475 fsxr
00476 fsxr
00477 fsxr
00478 fsxr
00479 fsxr
00480 fsxr
00481 fsxr
00482 fsxr
00483 fsxr
00484 fsxr
00485 fsxr
00486 fsxr
00487 fsxr
00488 fsxr
00489 fsxr
00490 fsxr
00491 fsxr
00492 fsxr
00493 fsxr
00494 fsxr
00495 fsxr
00496 fsxr
00497 fsxr
00498 fsxr
00499 fsxr
00500 fsxr
00501 fsxr
00502 fsxr
00503 fsxr
00504 fsxr
00505 fsxr
00506 fsxr
00507 diskx
00508 wc
00509 ps
_kernel_process_status_end:
_current_pid: 354
_current_tid: 0xfffffc0007d82b80
_proc_thread_list_begin:
thread 0xfffffc0007d82b80 stopped at [boot:1760 ,0xfffffc00003ae2cc]
Source
not available
_proc_thread_list_end:
_dump_begin:
> 0 boot(0x0, 0x4, 0xfffffc0000572000, 0x0, 0xfffffc0000000001)
["../../../../s
rc/kernel/arch/alpha/machdep.c":1760, 0xfffffc00003ae2cc]
1 panic(s = 0xfffffc00004ac6a8 = "LOCAL BUS FAULT\n")
["../../../../src/kerne
l/bsd/subr_prf.c":673, 0xfffffc000036e3e8]
pcpu = (nil)
i = 5376408
bootopt = 1
mycpu = 0
spl = 3
prevcc = 0
nextcc = 18446739675663040512
timer = -2010074360
limit = -4398044187855
2 psiop_hardintr(shp = 0xffffffff87fee000)
["../../../../src/kernel/io/cam/si
op/psiop.c":2086, 0xfffffc000042e2dc]
ctlr = 0xfffffc00004470dc
sjp = 0xfffffc00005209e8
pc = 4477672
i = 7
csr = 48
istat = ';'
dstat = '\240'
sstat0 = '^@'
restart = 0
r = 4194464
stat = 4194464
new_dsp = 3492184
module = 0xfffffc00004ac648 = "psiop_hardintr"
op = 237
i = 5376392
qp = 0xfffffc00004b0ef0
dbc = 5376488
dnad = 4613540
script_dbc = 2684354560
3 psiop_intr(0x0, 0xfffffc000041c390, 0xffffffff8830b696, 0xa, 0x0)
["../../.
./../src/kernel/io/cam/siop/pci/psiop_pci.c":1722, 0xfffffc0000433d80]
4 psiopintr(0x0, 0xadf20334e0916, 0xffffffff8830b696,
0xfffffc0000200100, 0x0
) ["../../../../src/kernel/io/cam/siop/pci/psiop_pci.c":2155,
0xfffffc00004342d4
]
5 intr_dispatch_post(0xfffffc0007fb0c40, 0x2, 0x1,
0xfffffc00003cd9a4, 0x3030
303030316430)
["../../../../src/kernel/arch/alpha/hal/shared_intr.c":238, 0xffff
fc00003d1758]
6 _XentInt(0x2, 0xfffffc00003bb6f4, 0xfffffc00004d1050, 0x2,
0xfffffc00004cae
58) ["../../../../src/kernel/arch/alpha/locore.s":934,
0xfffffc00003ab06c]
7 swap_ipl(0x2, 0xfffffc00003bb6f4, 0xfffffc00004d1050, 0x2,
0xfffffc00004cae
58) ["../../../../src/kernel/arch/alpha/spl.s":131, 0xfffffc00003bb6f0]
8 boot(0x0, 0x0, 0xfffffc00004a1f10, 0xfffffc0000526000, 0x10b)
["../../../..
/src/kernel/arch/alpha/machdep.c":1674, 0xfffffc00003ae144]
9 panic(s = 0xfffffc00004a1f10 = "System Uncorrectable Machine Check
660") ["
../../../../src/kernel/bsd/subr_prf.c":757, 0xfffffc000036e5a4]
pcpu = 0xfffffc0000528030
i = 4035620
bootopt = 1074468232
mycpu = 1
spl = 7
prevcc = 1
nextcc = 18446739675665362897
timer = 0
limit = -2010087424
10 kn470_machcheck(0x660, 0xfffffc0000006060, 0xffffffff8830b950,
0xacc10334e0
916, 0x0) ["../../../../src/kernel/arch/alpha/hal/kn470.c":821,
0xfffffc00003d94
94]
11 mach_error(0x1, 0xfffffc0000006060, 0xffffffff8830b950, 0x1,
0xffffffff8830
b950) ["../../../../src/kernel/arch/alpha/hal/cpusw.c":883,
0xfffffc00003cad74]
12 _XentInt(0x8, 0x3ff81f421ec, 0x3ffc096a940, 0x484302118, 0x5)
["../../../..
/src/kernel/arch/alpha/locore.s":997, 0xfffffc00003ab0fc]
_dump_end:
warning: Files compiled -g3: parameter values probably wrong
_kernel_thread_list_begin:
thread 0xfffffc0007f7c000 stopped at [thread_run:2287
+0x2c,0xfffffc00003a1358
] Source not available
thread 0xfffffc0007f7c400 stopped at [thread_block:1934
,0xfffffc00003a0bd8]
Source not available
thread 0xfffffc0007fae800 stopped at [thread_block:1934
,0xfffffc00003a0bd8]
Source not available
thread 0xfffffc0007faec00 stopped at [thread_block:1919
+0x28,0xfffffc00003a0b
68] Source not available
thread 0xfffffc0007faf000 stopped at [thread_block:1919
+0x28,0xfffffc00003a0b
68] Source not available
thread 0xfffffc0007faf400 stopped at [thread_block:1934
,0xfffffc00003a0bd8]
Source not available
thread 0xfffffc0007faf800 stopped at [thread_block:1934
,0xfffffc00003a0bd8]
Source not available
thread 0xfffffc0007dc2000 stopped at [thread_block:1919
+0x28,0xfffffc00003a0b
68] Source not available
thread 0xfffffc0007dc2800 stopped at [thread_block:1934
,0xfffffc00003a0bd8]
Source not available
thread 0xfffffc0007dc2c00 stopped at [thread_block:1934
,0xfffffc00003a0bd8]
Source not available
thread 0xfffffc0007dc3000 stopped at [thread_block:1934
,0xfffffc00003a0bd8]
Source not available
thread 0xfffffc0007dc3400 stopped at [thread_block:1919
+0x28,0xfffffc00003a0b
68] Source not available
thread 0xfffffc0007dc3800 stopped at [thread_block:1919
+0x28,0xfffffc00003a0b
68] Source not available
thread 0xfffffc0007dc3c00 stopped at [thread_block:1919
+0x28,0xfffffc00003a0b
68] Source not available
thread 0xfffffc0007da2000 stopped at [thread_block:1919
+0x28,0xfffffc00003a0b
68] Source not available
thread 0xfffffc0007da2400 stopped at [thread_block:1934
,0xfffffc00003a0bd8]
Source not available
_kernel_thread_list_end:
_savedefp: (nil)
_kernel_memory_fault_data_begin:
struct {
fault_va = 0x0
fault_pc = 0x0
fault_ra = 0x0
fault_sp = 0x0
access = 0x0
status = 0x0
cpunum = 0x0
count = 0x0
pcb = (nil)
thread = (nil)
task = (nil)
proc = (nil)
}
_kernel_memory_fault_data_end:
Invalid character in input
_uptime: .50 hours
paniccpu: 0x0
machine_slot[paniccpu]: struct {
is_cpu = 0x1
cpu_type = 0xf
cpu_subtype = 0x18
running = 0x1
cpu_ticks = {
[0] 0x92f1
[1] 0x0
[2] 0x5f14
[3] 0x1a558c
[4] 0xf749
}
clock_freq = 0x400
error_restart = 0x0
cpu_panicstr = 0xfffffc00004a1f10 = "System Uncorrectable Machine
Check 660"
cpu_panic_thread = 0xfffffc0007d82b80
}
tset machine_slot[paniccpu].cpu_panic_thread:
Begin Trace for machine_slot[paniccpu].cpu_panic_thread:
> 0 boot(0x0, 0x4, 0xfffffc0000572000, 0x0, 0xfffffc0000000001)
["../../../../s
rc/kernel/arch/alpha/machdep.c":1760, 0xfffffc00003ae2cc]
1 panic(s = 0xfffffc00004ac6a8 = "LOCAL BUS FAULT\n")
["../../../../src/kerne
l/bsd/subr_prf.c":673, 0xfffffc000036e3e8]
2 psiop_hardintr(shp = 0xffffffff87fee000)
["../../../../src/kernel/io/cam/si
op/psiop.c":2086, 0xfffffc000042e2dc]
3 psiop_intr(0x0, 0xfffffc000041c390, 0xffffffff8830b696, 0xa, 0x0)
["../../.
./../src/kernel/io/cam/siop/pci/psiop_pci.c":1722, 0xfffffc0000433d80]
4 psiopintr(0x0, 0xadf20334e0916, 0xffffffff8830b696,
0xfffffc0000200100, 0x0
) ["../../../../src/kernel/io/cam/siop/pci/psiop_pci.c":2155,
0xfffffc00004342d4
]
5 intr_dispatch_post(0xfffffc0007fb0c40, 0x2, 0x1,
0xfffffc00003cd9a4, 0x3030
303030316430)
["../../../../src/kernel/arch/alpha/hal/shared_intr.c":238, 0xffff
fc00003d1758]
6 _XentInt(0x2, 0xfffffc00003bb6f4, 0xfffffc00004d1050, 0x2,
0xfffffc00004cae
58) ["../../../../src/kernel/arch/alpha/locore.s":934,
0xfffffc00003ab06c]
7 swap_ipl(0x2, 0xfffffc00003bb6f4, 0xfffffc00004d1050, 0x2,
0xfffffc00004cae
58) ["../../../../src/kernel/arch/alpha/spl.s":131, 0xfffffc00003bb6f0]
8 boot(0x0, 0x0, 0xfffffc00004a1f10, 0xfffffc0000526000, 0x10b)
["../../../..
/src/kernel/arch/alpha/machdep.c":1674, 0xfffffc00003ae144]
9 panic(s = 0xfffffc00004a1f10 = "System Uncorrectable Machine Check
660") ["
../../../../src/kernel/bsd/subr_prf.c":757, 0xfffffc000036e5a4]
10 kn470_machcheck(0x660, 0xfffffc0000006060, 0xffffffff8830b950,
0xacc10334e0
916, 0x0) ["../../../../src/kernel/arch/alpha/hal/kn470.c":821,
0xfffffc00003d94
94]
11 mach_error(0x1, 0xfffffc0000006060, 0xffffffff8830b950, 0x1,
0xffffffff8830
b950) ["../../../../src/kernel/arch/alpha/hal/cpusw.c":883,
0xfffffc00003cad74]
12 _XentInt(0x8, 0x3ff81f421ec, 0x3ffc096a940, 0x484302118, 0x5)
["../../../..
/src/kernel/arch/alpha/locore.s":997, 0xfffffc00003ab0fc]
End Trace for machine_slot[paniccpu].cpu_panic_thread:
"cpu_data" is not an array
_stack_trace[0]_begin:
> 0 boot(0x0, 0x4, 0xfffffc0000572000, 0x0, 0xfffffc0000000001)
["../../../../s
rc/kernel/arch/alpha/machdep.c":1760, 0xfffffc00003ae2cc]
1 panic(s = 0xfffffc00004ac6a8 = "LOCAL BUS FAULT\n")
["../../../../src/kerne
l/bsd/subr_prf.c":673, 0xfffffc000036e3e8]
2 psiop_hardintr(shp = 0xffffffff87fee000)
["../../../../src/kernel/io/cam/si
op/psiop.c":2086, 0xfffffc000042e2dc]
3 psiop_intr(0x0, 0xfffffc000041c390, 0xffffffff8830b696, 0xa, 0x0)
["../../.
./../src/kernel/io/cam/siop/pci/psiop_pci.c":1722, 0xfffffc0000433d80]
4 psiopintr(0x0, 0xadf20334e0916, 0xffffffff8830b696,
0xfffffc0000200100, 0x0
) ["../../../../src/kernel/io/cam/siop/pci/psiop_pci.c":2155,
0xfffffc00004342d4
]
5 intr_dispatch_post(0xfffffc0007fb0c40, 0x2, 0x1,
0xfffffc00003cd9a4, 0x3030
303030316430)
["../../../../src/kernel/arch/alpha/hal/shared_intr.c":238, 0xffff
fc00003d1758]
6 _XentInt(0x2, 0xfffffc00003bb6f4, 0xfffffc00004d1050, 0x2,
0xfffffc00004cae
58) ["../../../../src/kernel/arch/alpha/locore.s":934,
0xfffffc00003ab06c]
7 swap_ipl(0x2, 0xfffffc00003bb6f4, 0xfffffc00004d1050, 0x2,
0xfffffc00004cae
58) ["../../../../src/kernel/arch/alpha/spl.s":131, 0xfffffc00003bb6f0]
8 boot(0x0, 0x0, 0xfffffc00004a1f10, 0xfffffc0000526000, 0x10b)
["../../../..
/src/kernel/arch/alpha/machdep.c":1674, 0xfffffc00003ae144]
9 panic(s = 0xfffffc00004a1f10 = "System Uncorrectable Machine Check
660") ["
../../../../src/kernel/bsd/subr_prf.c":757, 0xfffffc000036e5a4]
10 kn470_machcheck(0x660, 0xfffffc0000006060, 0xffffffff8830b950,
0xacc10334e0
916, 0x0) ["../../../../src/kernel/arch/alpha/hal/kn470.c":821,
0xfffffc00003d94
94]
11 mach_error(0x1, 0xfffffc0000006060, 0xffffffff8830b950, 0x1,
0xffffffff8830
b950) ["../../../../src/kernel/arch/alpha/hal/cpusw.c":883,
0xfffffc00003cad74]
12 _XentInt(0x8, 0x3ff81f421ec, 0x3ffc096a940, 0x484302118, 0x5)
["../../../..
/src/kernel/arch/alpha/locore.s":997, 0xfffffc00003ab0fc]
_stack_trace[0]_end:
/usr/bin/crashdc: /bin/kdbx: not found
#
_crash_data_collection_finished:
So what is wrong with this machine??????????
I don't have any more ideas.
G. vom Hau MCS Frankfurt
T.R | Title | User | Personal Name | Date | Lines
|
---|