
wada.shinichiro at jp
Mar 13, 2012, 1:20 AM
Post #3 of 7
(346 views)
Permalink
|
$B>>Hx$5$s(B $B$3$s$K$A$O!#(B $BOBED$G$9!#(B $B%3%a%s%H$*$h$S$4;XE&$"$j$,$H$&$4$6$$$^$9!#(B diskd$B$N%Q%C%A$O;29M$K$5$;$FD:$-$?$$$H;W$$$^$9!#(B diskd$B!J%j%=!<%9!K$N [at] _D$K4X$7$F>/$7$h$/$o$+$C$F$J$$$N$G$465<($/$@$5$$!#(B > diskd $B$K(B SUCCESS $B$H$$$&B0@-$O$J$/!"(B > diskd $B$O!"@5>o;~$O!"(Bnormal, $B0[>o;~$O(B ERROR $B$K$J$j$^$9!#(B $B$H$"$j$^$9$,!"(BOCF_SUCCESS$B$rJV5Q$7$F$$$?$N$G!"(BSUCCESS$B$+$H(B $B;W$C$F$$$?$N$G$9$,!"G'<1$,0c$C$F$$$?$h$&$G$9$M!#(B $B!t(Bpingd$B$OL7=b$7$F$$$^$9$M!#!#!#(B $B3F%j%=!<%9$NB0@-$O$I$N$h$&$K3NG'$9$l$P$h$$$N$G$7$g$&$+!)(B diskd$B$N(Brule$B$O!&!&!&JQ$G$9$M!#(B $B!t(Bpingd$B$b$h$m$7$/$O$J$$$G$9$M!#!#!#(B >> rule -INFINITY: defined ping_set and ping_set lt 200 \ >> rule -INFINITY: defined diskd_set and diskd_set eq SUCCESS $B$G$O$J$/!"(B rule -INFINITY: not_defined ping_set or ping_set lt 200 \ rule -INFINITY: not_defined diskd_set or diskd_set eq ERROR $B$H$9$Y$-$J$N$G$9$M!#(B $B$h$m$7$/$*4j$$CW$7$^$9!#(B > $BOBED$5$s(B > > $B>>Hx$G$9!#(B > > monitor $B$N(B interval $B$O(B monitor $B$,<B9T$5$l$k4V3V$J$N$G!"(B > star t$B40N;8e$O$*$=$i$/4X78$J$$$H;W$$$^$9!#(B > $B6D$C$F$$$kDL$j!"(Bstart$B;~$K!"(Bpid $B%U%!%$%k$N:n@.BT$A$r$9$kI,MW$,$"$j$=$&$G$9$M!#(B > > $BBT$A9g$o$;$k%Q%C%A=q$/$H$7$?$i!"$3$s$J46$8$G$7$g$&$+!#(B > > /usr/lib/ocf/resource.d/pacemaker/diskd > ------------------------------------------------------------------------------- > --- A/diskd 2010-11-25 18:37:15.000000000 +0900 > +++ B/diskd 2012-03-13 15:18:32.000000000 +0900 > @@ -145,6 +145,15 @@ > $diskd_cmd > rc=$? > if [ $rc = 0 ]; then > + while : > + do > + diskd_monitor > + if [ $? -eq $OCF_SUCCESS ]; then > + break; > + fi > + ocf_log warn "$diskd_cmd still hasn't started yet. Waiting..." > + sleep 1 > + done > exit $OCF_SUCCESS > fi > -------------------------------------------------------------------------------- > > > $B$A$J$_$K!"@_Dj%U%!%$%k8+$F5$$E$$$?$N$G$9$,!"(B > diskd $B$K(B SUCCESS $B$H$$$&B0@-$O$J$/!"(B > diskd $B$O!"@5>o;~$O!"(Bnormal, $B0[.>o;~$O(B ERROR $B$K$J$j$^$9!#(B > $B$h$C$F0J2<$N [at] _D$O0UL#$,$J$$$H;W$$$^$9!#(B(2$B2U=j(B) > ---- > diskd_set eq SUCCESS > ---- > > > $B$5$i$K!"@_Dj$+$i?dB,$9$k$H!"%G%#%9%/8N>c;~$K(B rule -INFINITY $B$H$7$?$$$H(B > $B;W$o$l$^$9$N$G!"$3$N>l9g$O(BSUCCESS$B$N=j$r(BERROR$B$KCV$-49$($k$N$,(B > $B$h$$$H;W$$$^$9!#(B > $B"((B normal $B$@$H(B $B%G%#%9%/@5>o;~$K%j%=!<%9$,F0$1$J$/$J$k$H$$$&0UL#$K$J$C$F$7$^$$$^$9(B > > $B0J>e$G$9!#(B > > > > 2012$BG/(B3$B7n(B12$BF|(B21:59 $BOBED!!?-0lO/(B<wada.shinichiro [at] jp>: >> $B$3$s$K$A$O!#(B >> $BOBED$G$9!#(B >> >> $B$$$D$b$*@$OC$K$J$C$F$*$j$^$9!#(B >> >> Active/Passive$B9=@.$G(Bpm_diskd$B$rMQ$$$F4F;k$r9T$C$F$$$^$9$,!"(B >> $B%(%i!<$,H/@8$7$?$h$&$G5/F0$G$-$^$;$s$G$7$?!#(B >> >> $B%m%0$r8+$?$H$3$m!"!J$"$^$j$o$+$C$F$J$$$N$G$9$,!#!#!#!K(B >> $B5/F0$,40N;$9$kA0$K(Bmoniter$B$,<B9T$5$l!"!V(Bnot running$B!W$HH=Dj$5$l$?$h$&$K8+$($^$9!#(B >> >> $B$^$?!"%=!<%9$r8+$?$H$3$m!"(Bdiskd$B$N%G!<%b%s$,(Bfork$B$7$?$"$H$K(Bexit$B$7$F$$$k$h$&$G$7$?$N$G!"(B >> diskd$B$N%9%/%j%W%H$N(Bstart$B$G(Bpid$B%U%!%$%k$N:n@.$rBT$A9g$o$;$r$7$J$$$H!"%?%$%_%s%0$K$h$C$F$O(B >> $B$3$N$h$&$J$9$l0c$$$,H/@8$7$J$$$+$H$$$&E@$,5$$K$J$k$N$G$9$,!"(Bmonitor$B$O(Bstart$B40N;8e!"(B >> interval$B7P2a;~4V$rBT$?$:$K<B9T$5$l$k$N$G$7$g$&$+!)(B >> >> $B$3$N>uBV$H$J$C$?>l9g!"(Bdiskd$B$,5/F0$9$k$3$H$J$/!"$^$?!"(BFailed Actions$B$K$b(B >> $BI=<($5$l$^$;$s$G$7$?!#(B >> $B$3$NF0:n$O@5$7$$F0:n$J$N$G$7$g$&$+!)(B >> $B$^$?!"%a!<%k$N:G8e$N5-:\$7$F$*$j$^$9$,@_Dj$G8+D>$9E@$,$J$$$+$465<(4j$$$^$9!#(B >> >> pm_diskd$B$N%P!<%8%g%s$O(B1.0.1$B$K$J$j$^$9!#(B >> $B5/F0;~$N%m%0$rH4?h$9$k$H0J2<$K$J$j$^$9!#(B >> >> ------------------------------------------------------------------------- >> >> Mar 12 18:00:03 it13 diskd: [21118]: info: Invoked: /usr/lib64/heartbeat/diskd -D -p /var/run//diskd-diskd_set -a diskd_set -i 30 -N /dev/sda1 >> Mar 12 18:00:03 it13 crmd: [20769]: info: process_lrm_event: LRM operation prmDiskd:0_start_0 (call=16, rc=0, cib-update=44, confirmed=true) ok >> Mar 12 18:00:03 it13 crmd: [20769]: info: match_graph_event: Action prmDiskd:0_start_0 (53) confirmed on it13 (rc=0) >> Mar 12 18:00:03 it13 crmd: [20769]: info: te_rsc_command: Initiating action 54: monitor prmDiskd:0_monitor_10000 on it13 (local) >> Mar 12 18:00:03 it13 crmd: [20769]: info: do_lrm_rsc_op: Performing key=54:1:0:334535ec-732d-47d4-ac94-98cc23fd5911 op=prmDiskd:0_monitor_10000 ) >> Mar 12 18:00:03 it13 lrmd: [20766]: info: rsc:prmDiskd:0:19: monitor >> Mar 12 18:00:03 it13 crmd: [20769]: info: process_lrm_event: LRM operation prmDiskd:0_monitor_10000 (call=19, rc=7, cib-update=45, confirmed=false) not running >> Mar 12 18:00:03 it13 crmd: [20769]: WARN: status_from_rc: Action 54 (prmDiskd:0_monitor_10000) on it13 failed (target: 0 vs. rc: 7): Error >> Mar 12 18:00:03 it13 crmd: [20769]: WARN: update_failcount: Updating failcount for prmDiskd:0 on it13 after failed monitor: rc=7 (update=value++, time=1331542803) >> Mar 12 18:00:03 it13 crmd: [20769]: info: abort_transition_graph: match_graph_event:291 - Triggered transition abort (complete=0, tag=lrm_rsc_op, id=prmDiskd:0_monitor_10000, magic=0:7;54:1:0:334535ec-732d-47d4-ac94-98cc23fd5911, cib=0.64.32) : Event failed >> Mar 12 18:00:03 it13 crmd: [20769]: info: update_abort_priority: Abort priority upgraded from 0 to 1 >> Mar 12 18:00:03 it13 crmd: [20769]: info: update_abort_priority: Abort action done superceeded by restart >> Mar 12 18:00:03 it13 attrd: [20767]: info: find_hash_entry: Creating hash entry for fail-count-prmDiskd:0 >> Mar 12 18:00:03 it13 crmd: [20769]: info: match_graph_event: Action prmDiskd:0_monitor_10000 (54) confirmed on it13 (rc=4) >> Mar 12 18:00:03 it13 attrd: [20767]: info: attrd_local_callback: Expanded fail-count-prmDiskd:0=value++ to 1 >> Mar 12 18:00:03 it13 attrd: [20767]: info: attrd_trigger_update: Sending flush op to all hosts for: fail-count-prmDiskd:0 (1) >> Mar 12 18:00:03 it13 diskd: [21144]: info: attrd_lazy_update: Connecting to cluster... 5 retries remaining >> Mar 12 18:00:03 it13 diskd: [21144]: info: main: Starting diskd >> Mar 12 18:00:03 it13 crmd: [20769]: info: abort_transition_graph: te_update_diff:150 - Triggered transition abort (complete=0, tag=nvpair, id=status-it13-fail-count-prmDiskd:0, magic=NA, cib=0.64.33) : Transient attribute: update >> Mar 12 18:00:03 it13 crmd: [20769]: info: update_abort_priority: Abort priority upgraded from 1 to 1000000 >> Mar 12 18:00:03 it13 crmd: [20769]: info: update_abort_priority: 'Event failed' abort superceeded >> Mar 12 18:00:03 it13 attrd: [20767]: info: attrd_perform_update: Sent update 20: last-failure-prmDiskd:0=1331542803 >> Mar 12 18:00:03 it13 attrd: [20767]: info: find_hash_entry: Creating hash entry for diskd_set >> Mar 12 18:00:03 it13 attrd: [20767]: info: attrd_trigger_update: Sending flush op to all hosts for: diskd_set (normal) >> Mar 12 18:00:03 it13 crmd: [20769]: info: abort_transition_graph: te_update_diff:150 - Triggered transition abort (complete=0, tag=nvpair, id=status-it13-last-failure-prmDiskd:0, magic=NA, cib=0.64.34) : Transient attribute: update >> Mar 12 18:00:03 it13 attrd: [20767]: info: attrd_perform_update: Sent update 23: diskd_set=normal >> Mar 12 18:00:03 it13 crmd: [20769]: info: abort_transition_graph: te_update_diff:150 - Triggered transition abort (complete=0, tag=nvpair, id=status-it13-diskd_set, magic=NA, cib=0.64.35) : Transient attribute: update >> Mar 12 18:00:03 it13 lrmd: [20766]: info: RA output: (prmDrbd:0:start:stdout) >> Mar 12 18:00:03 it13 lrmd: [20766]: info: RA output: (prmDrbd:0:start:stdout) >> Mar 12 18:00:03 it13 lrmd: [20766]: info: RA output: (prmDrbd:0:start:stdout) >> Mar 12 18:00:04 it13 lrmd: [20766]: info: RA output: (prmDrbd:0:start:stdout) >> Mar 12 18:00:04 it13 lrmd: [20766]: info: RA output: (prmDrbd:0:start:stdout) >> Mar 12 18:00:04 it13 lrmd: [20766]: info: RA output: (prmDrbd:0:start:stdout) >> Mar 12 18:00:04 it13 crmd: [20769]: info: process_lrm_event: LRM operation prmDrbd:0_start_0 (call=18, rc=0, cib-update=46, confirmed=true) ok >> Mar 12 18:00:04 it13 crmd: [20769]: info: match_graph_event: Action prmDrbd:0_start_0 (25) confirmed on it13 (rc=0) >> $B!!!&(B >> $B!!!&(B >> $B!!!&(B >> ($B>JN,(B) >> $B!!!&(B >> $B!!!&(B >> $B!!!&(B >> Mar 12 18:00:17 it13 crmd: [20769]: info: do_lrm_rsc_op: Performing key=111:2:0:334535ec-732d-47d4-ac94-98cc23fd5911 op=prmDrbd:0_notify_0 ) >> Mar 12 18:00:17 it13 lrmd: [20766]: info: rsc:prmDrbd:0:24: notify >> Mar 12 18:00:17 it13 crmd: [20769]: info: te_rsc_command: Initiating action 113: notify prmDrbd:1_pre_notify_promote_0 on it14 >> Mar 12 18:00:17 it13 crmd: [20769]: info: te_rsc_command: Initiating action 73: stop prmDiskd:0_stop_0 on it13 (local) >> Mar 12 18:00:17 it13 lrmd: [20766]: info: cancel_op: operation monitor[19] on ocf::diskd::prmDiskd:0 for client 20769, its parameters: CRM_meta_clone=[0] device=[/dev/sda1] name=[diskd_set] CRM_meta_clone_node_max=[1] CRM_meta_clone_max=[2] CRM_meta_notify=[false] CRM_meta_globally_unique=[false] crm_feature_set=[3.0.1] CRM_meta_on_fail=[restart] CRM_meta_name=[monitor] CRM_meta_interval=[10000] CRM_meta_timeout=[60000] cancelled >> Mar 12 18:00:17 it13 crmd: [20769]: info: do_lrm_rsc_op: Performing key=73:2:0:334535ec-732d-47d4-ac94-98cc23fd5911 op=prmDiskd:0_stop_0 ) >> Mar 12 18:00:17 it13 lrmd: [20766]: info: rsc:prmDiskd:0:25: stop >> Mar 12 18:00:17 it13 crmd: [20769]: info: process_lrm_event: LRM operation prmDiskd:0_monitor_10000 (call=19, status=1, cib-update=0, confirmed=true) Cancelled >> Mar 12 18:00:17 it13 pengine: [20768]: info: process_pe_message: Transition 2: PEngine Input stored in: /var/lib/pengine/pe-input-13774.bz2 >> Mar 12 18:00:17 it13 diskd: [21144]: info: diskd_shutdown: Exiting >> Mar 12 18:00:17 it13 diskd: [21144]: info: main: Exiting diskd >> >> ------------------------------------------------------------------------- >> >> $B$J$*!"9=@.$O0JA0<ALd$5$;$FD:$$$?$H$-$H$[$\F1MM$G!"(BCorosync + Pacemaker + DRBD$B$G(B >> $B0J2<$N9=@.$H$J$C$F$$$^$9!#(B >> >> ------------------------------------------------------------------------- >> >> primitive drbd_db ocf:linbit:drbd \ >> params drbd_resource="pgsql" \ >> op start interval="0s" timeout="240s" on-fail="restart" \ >> op monitor interval="11s" timeout="60s" on-fail="restart" \ >> op monitor interval="10s" timeout="60s" on-fail="restart" role="Master" \ >> op stop interval="0s" timeout="100s" on-fail="fence" >> >> primitive ip_db ocf:heartbeat:IPaddr2 \ >> params ip="192.168.1.175" \ >> nic="eth1" \ >> cidr_netmask="24" \ >> op start interval="0s" timeout="90s" on-fail="restart" \ >> op monitor interval="10s" timeout="60s" on-fail="restart" \ >> op stop interval="0s" timeout="100s" on-fail="fence" >> >> primitive prmPing ocf:pacemaker:ping \ >> params \ >> name="ping_set" \ >> host_list="192.168.1.1 192.168.2.1" \ >> multiplier="100" \ >> dampen="0" \ >> meta \ >> migration-threshold="3" \ >> failure-timeout="60s" \ >> op start interval="0s" timeout="90s" on-fail="restart" \ >> op monitor interval="10s" timeout="60s" on-fail="restart" \ >> op stop interval="0s" timeout="100s" on-fail="ignore" >> >> primitive fs_db ocf:heartbeat:Filesystem \ >> params device="/dev/drbd/by-res/pgsql" directory="/data" fstype="ext4" \ >> op start interval="0s" timeout="60s" on-fail="restart" \ >> op monitor interval="10s" timeout="60s" on-fail="restart" \ >> op stop interval="0s" timeout="60s" on-fail="fence" >> >> primitive prmPg ocf:heartbeat:pgsql \ >> params pgctl="/usr/bin/pg_ctl" \ >> start_opt="-p 5432" \ >> psql="/usr/bin/psql" \ >> pgdata="/data/" \ >> pgdba="postgres" \ >> pgport="5432" \ >> pgdb="postgres" \ >> op start interval="0s" timeout="120s" on-fail="restart" \ >> op monitor interval="10s" timeout="60s" on-fail="restart" \ >> op stop interval="0s" timeout="120s" on-fail="fence" >> >> primitive apache ocf:heartbeat:apache \ >> params configfile="/etc/httpd/conf/httpd.conf" \ >> port="80" \ >> op start interval="0s" timeout="40s" on-fail="restart" \ >> op monitor interval="10s" timeout="60s" on-fail="restart" \ >> op stop interval="0s" timeout="60s" on-fail="fence" >> >> primitive prmDiskd ocf:pacemaker:diskd \ >> params name="diskd_set" \ >> device="/dev/sda1" \ >> op start interval="0s" timeout="60s" on-fail="restart" \ >> op monitor interval="10s" timeout="60s" on-fail="restart" \ >> op stop interval="0s" timeout="60s" on-fail="ignore" >> >> primitive prmStonith1-1 stonith:external/stonith-helper \ >> params \ >> priority="1" \ >> stonith-timeout="60s" \ >> hostlist="it13" \ >> dead_check_target="192.168.1.173" \ >> run_standby_wait="no" \ >> op start interval="0s" timeout="60s" \ >> op monitor interval="3600s" timeout="60s" \ >> op stop interval="0s" timeout="60s" >> >> primitive prmStonith1-2 stonith:external/ssh \ >> params \ >> priority="2" \ >> stonith-timeout="60s" \ >> hostlist="it13" \ >> op start interval="0s" timeout="60s" \ >> op monitor interval="3600s" timeout="60s" \ >> op stop interval="0s" timeout="60s" >> >> primitive prmStonith1-3 stonith:meatware \ >> params \ >> priority="3" \ >> stonith-timeout="600" \ >> hostlist="it13" \ >> op start interval="0s" timeout="60s" \ >> op monitor interval="3600s" timeout="60s" \ >> op stop interval="0s" timeout="60s" >> >> primitive prmStonith2-1 stonith:external/stonith-helper \ >> params \ >> priority="1" \ >> stonith-timeout="60s" \ >> hostlist="it14" \ >> dead_check_target="192.168.1.174" \ >> run_standby_wait="no" \ >> op start interval="0s" timeout="60s" \ >> op monitor interval="3600s" timeout="60s" \ >> op stop interval="0s" timeout="60s" >> >> primitive prmStonith2-2 stonith:external/ssh \ >> params \ >> priority="2" \ >> stonith-timeout="60s" \ >> hostlist="it14" \ >> op start interval="0s" timeout="60s" \ >> op monitor interval="3600s" timeout="60s" \ >> op stop interval="0s" timeout="60s" >> >> primitive prmStonith2-3 stonith:meatware \ >> params \ >> priority="3" \ >> stonith-timeout="600" \ >> hostlist="it14" \ >> op start interval="0s" timeout="60s" \ >> op monitor interval="3600s" timeout="60s" \ >> op stop interval="0s" timeout="60s" >> >> group group_all fs_db ip_db prmPg apache >> >> group grpStonith1 \ >> prmStonith1-1 \ >> prmStonith1-2 \ >> prmStonith1-3 >> >> group grpStonith2 \ >> prmStonith2-1 \ >> prmStonith2-2 \ >> prmStonith2-3 >> >> ms ms_drbd_db drbd_db \ >> meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true" >> >> clone clnPing prmPing \ >> meta clone-max="2" clone-node-max="1" >> >> clone clnDiskd prmDiskd \ >> meta clone-max="2" clone-node-max="1" >> >> location group_all-location group_all \ >> rule 200: #uname eq it13 \ >> rule 100: #uname eq it14 \ >> rule -INFINITY: defined ping_set and ping_set lt 200 \ >> rule -INFINITY: defined diskd_set and diskd_set eq SUCCESS >> >> location master-location_db ms_drbd_db \ >> rule 200: #uname eq it13 \ >> rule 100: #uname eq it14 \ >> rule role=master -INFINITY: defined ping_set and ping_set lt 200 \ >> rule role=master -INFINITY: defined diskd_set and diskd_set eq SUCCESS \ >> rule role=master -INFINITY: defined fail-count-fs_db \ >> rule role=master -INFINITY: defined fail-count-ip_db \ >> rule role=master -INFINITY: defined fail-count-prmPg \ >> rule role=master -INFINITY: defined fail-count-apache >> >> location rsc_location-grpStonith1-1 grpStonith1 \ >> rule -INFINITY: #uname eq it13 >> >> location rsc_location-grpStonith2-1 grpStonith2 \ >> rule -INFINITY: #uname eq it14 >> >> colocation db_on_drbd INFINITY: group_all ms_drbd_db:Master >> colocation clnPing-colocation INFINITY: group_all clnPing >> colocation clnDiskd-colocation INFINITY: group_all clnDiskd >> order order_db_after_drbd INFINITY: ms_drbd_db:promote group_all:start >> order order_clnPing_after_all 0: clnPing group_all symmetrical=false >> order order_clnDiskd_after_all 0: clnDiskd group_all symmetrical=false >> >> property no-quorum-policy="ignore" \ >> stonith-enabled="true" \ >> startup-fencing="false" \ >> stonith-timeout="430s" >> >> rsc_defaults resource-stickiness="INFINITY" \ >> migration-threshold="1" >> >> ------------------------------------------------------------------------- >> >> $B$h$m$7$/$*4j$$CW$7$^$9!#(B > _______________________________________________ > Linux-ha-japan mailing list > Linux-ha-japan [at] lists > http://lists.sourceforge.jp/mailman/listinfo/linux-ha-japan _______________________________________________ Linux-ha-japan mailing list Linux-ha-japan [at] lists http://lists.sourceforge.jp/mailman/listinfo/linux-ha-japan
|