※リポジトリは、https://github.com/linux-ha-japan/pm_logconv-hb-1.0 へ移行しました。
Pacemaker 対応ログメッセージ変換機能。
Heartbeat-2.1.4 用 hb-logconv(*) のPacemaker1.0 + Heartbeat スタック対応版。
(*) http://sourceforge.jp/projects/linux-ha/releases/?package_id=10282
修訂 | 60b5d6bd626272998f0fdb565433afb887fea4a7 (tree) |
---|---|
時間 | 2012-02-09 09:28:54 |
作者 | Yoshihiko SATO <satoyoshi@inte...> |
Commiter | Yoshihiko SATO |
Support to Pacemaker-1.0.12, and improvement to the processing to judge fail-over.
@@ -2,7 +2,7 @@ | ||
2 | 2 | # Process this file with autoconf to produce a configure script. |
3 | 3 | |
4 | 4 | AC_PREREQ([2.59]) |
5 | -AC_INIT([pm_logconv-hb], [1.1]) | |
5 | +AC_INIT([pm_logconv-hb], [1.2]) | |
6 | 6 | AM_INIT_AUTOMAKE |
7 | 7 | AC_PREFIX_DEFAULT(/usr) |
8 | 8 | PM_PKG="pacemaker" |
@@ -51,17 +51,6 @@ | ||
51 | 51 | AC_SUBST(HA_NOARCHDATAHBDIR) |
52 | 52 | AC_PROG_LN_S |
53 | 53 | |
54 | -grep "release 6" /etc/redhat-release -q | |
55 | -if test $? = 0; then | |
56 | - rhelver=6 | |
57 | -else | |
58 | - rhelver=5 | |
59 | -fi | |
60 | -RHEL_VER="$rhelver" | |
61 | -AC_SUBST(RHEL_VER) | |
62 | - | |
63 | -AC_CONFIG_FILES(Makefile \ | |
64 | - pm_logconv.spec | |
65 | -) | |
54 | +AC_CONFIG_FILES([Makefile]) | |
66 | 55 | AC_OUTPUT |
67 | 56 |
@@ -9,6 +9,7 @@ | ||
9 | 9 | [Settings] |
10 | 10 | #ha_log_path = /var/log/ha-log |
11 | 11 | #output_path = /var/log/pm_logconv.out |
12 | +#hacf_path = /etc/ha.d/ha.cf | |
12 | 13 | #hostcache_path = /var/lib/heartbeat/hostcache |
13 | 14 | #syslogformat = True |
14 | 15 | #reset_interval = 60 |
@@ -81,6 +82,11 @@ | ||
81 | 82 | pattern_active=crmd,notice:,crmd_ha_status_callback:,Status update:,Node,now has status,active |
82 | 83 | func=node_status_updated |
83 | 84 | |
85 | +#Msg No.6-3 | |
86 | +[Node status determined] | |
87 | +pattern=pengine,determine_online_status,Node,is | |
88 | +func=node_status_determined | |
89 | + | |
84 | 90 | ### |
85 | 91 | # For Interconnect-LAN status event |
86 | 92 | # and Network status event (detected by pingd). |
@@ -157,7 +163,7 @@ | ||
157 | 163 | ## |
158 | 164 | #MsgNo.F0-1, F9-1, F10-1 |
159 | 165 | [Detect calculation starts] |
160 | -pattern=crmd,info:,do_state_transition:,State transition,-> S_POLICY_ENGINE,!I_SHUTDOWN | |
166 | +pattern=crmd,info:,do_state_transition:,State transition,S_IDLE -> S_POLICY_ENGINE,!I_SHUTDOWN | |
161 | 167 | func=detect_pe_calc |
162 | 168 | loglevel=WARN |
163 | 169 |
@@ -190,22 +196,16 @@ | ||
190 | 196 | #MsgNo.F11-2 |
191 | 197 | #The message is not output immediately, output when F/O is complete. |
192 | 198 | [Add Resource stop action] |
193 | -pattern=pengine,notice:,LogActions: Stop,resource | |
199 | +pattern=pengine,notice:,LogActions: Stop | |
194 | 200 | func=add_rsc_stop |
195 | 201 | |
196 | 202 | #MsgNo.F11-3, F11-8, F11-9 |
197 | 203 | #The message is not output immediately, output when F/O is complete. |
198 | 204 | [Add no action] |
199 | -pattern_leave_start=pengine,notice:,LogActions: Leave,resource | |
200 | -pattern_restart=pengine,notice:,LogActions: Restart,resource | |
205 | +pattern_leave_start=pengine,notice:,LogActions: Leave | |
206 | +pattern_restart=pengine,notice:,LogActions: Restart | |
201 | 207 | func=add_no_action |
202 | 208 | |
203 | -#MsgNo.F11-4 | |
204 | -#The message is not output immediately, output when F/O is complete. | |
205 | -[Resource cannot run anywhere] | |
206 | -pattern=pengine,WARN:,native_color:,Resource,cannot run anywhere | |
207 | -func=detect_cannot_run_anywhere | |
208 | - | |
209 | 209 | #MsgNo.F11-5 |
210 | 210 | #The message is not output immediately, output when F/O is complete. |
211 | 211 | [Detect resource unmanaged] |
@@ -215,9 +215,15 @@ | ||
215 | 215 | #MsgNo.F11-6 |
216 | 216 | #The message is not output immediately, output when F/O is complete. |
217 | 217 | [Add Resource move action] |
218 | -pattern=pengine,notice:,LogActions: Move,resource | |
218 | +pattern=pengine,notice:,LogActions: Move | |
219 | 219 | func=add_rsc_move |
220 | 220 | |
221 | +#Msg No.F11-10 | |
222 | +#The message is not output immediately, output when F/O is complete. | |
223 | +[Resource initiating action] | |
224 | +pattern=crmd,info:,te_rsc_command:,Initiating action,!probe_complete | |
225 | +func=rsc_init_action | |
226 | + | |
221 | 227 | ### |
222 | 228 | # For DC election. |
223 | 229 | ### |
@@ -318,6 +324,11 @@ | ||
318 | 324 | pattern=attrd,info:,attrd_perform_update:,Sent delete,!delete -,!fail-count-,!last-failure-,!probe_complete,!shutdown,!master- |
319 | 325 | func=detect_attr_deleted |
320 | 326 | |
327 | +#Msg No.22-3 | |
328 | +[Detect cib updated] | |
329 | +pattern=crmd,info:,abort_transition_graph:,tag=nvpair,Transient attribute: update | |
330 | +func=detect_cib_updated | |
331 | + | |
321 | 332 | ### |
322 | 333 | # For Heartbeat service starts. |
323 | 334 | ### |
@@ -33,7 +33,7 @@ | ||
33 | 33 | # |
34 | 34 | # version number of pm_logconv. |
35 | 35 | # |
36 | -VERSION = "1.1" | |
36 | +VERSION = "1.2" | |
37 | 37 | |
38 | 38 | # |
39 | 39 | # system's host name. |
@@ -111,11 +111,6 @@ | ||
111 | 111 | CMD_CRM_ATTR = "crm_attribute" |
112 | 112 | |
113 | 113 | # |
114 | -# command name for getting current node status of the cluster. | |
115 | -# | |
116 | -CMD_CRM_NODE = "crm_node" | |
117 | - | |
118 | -# | |
119 | 114 | # command name for getting DC node status. |
120 | 115 | # |
121 | 116 | CMD_CRMADMIN = "crmadmin" |
@@ -403,7 +398,7 @@ | ||
403 | 398 | if pid.isdigit() and int(pid) != os.getpid(): |
404 | 399 | return self.is_running(int(pid), cmdline) |
405 | 400 | else: |
406 | - pm_log.warn("PIDFile.read: PID file is screwed up.") | |
401 | + pm_log.info("PIDFile.read: PID file is screwed up.") | |
407 | 402 | return self.FILE_INVALID |
408 | 403 | else: |
409 | 404 | pm_log.info("PIDFile.read: PID file doesn't exist.") |
@@ -474,7 +469,8 @@ | ||
474 | 469 | self.ACTRSC_MOVE = False |
475 | 470 | self.IN_FO_PROCESS = False |
476 | 471 | self.timedoutRscopSet = set() |
477 | - self.shutNodeSet = set() | |
472 | + self.attrDict = dict() | |
473 | + self.nodeDict = dict() | |
478 | 474 | |
479 | 475 | cstat = ConvertStatus() |
480 | 476 |
@@ -501,15 +497,16 @@ | ||
501 | 497 | cstat.ACTRSC_MOVE = c.ACTRSC_MOVE |
502 | 498 | cstat.IN_FO_PROCESS = c.IN_FO_PROCESS |
503 | 499 | cstat.timedoutRscopSet = c.timedoutRscopSet |
504 | - cstat.shutNodeSet = c.shutNodeSet | |
500 | + cstat.attrDict = c.attrDict | |
501 | + cstat.nodeDict = c.nodeDict | |
505 | 502 | else: |
506 | 503 | pm_log.info("StatusFile.read: status file doesn't exist.") |
507 | 504 | self.clear_cstat() |
508 | 505 | pm_log.debug("StatusFile.read: [%d:%d], FAIL[%s], IN_CALC[%s], "\ |
509 | - "RSC_MOVE[%s], IN_FO[%s], Rscop%s, Node%s" % | |
506 | + "RSC_MOVE[%s], IN_FO[%s], Rscop%s, attrDict%s, nodeDict%s" % | |
510 | 507 | (cstat.ino, cstat.offset, cstat.FAILURE_OCCURRED, |
511 | 508 | cstat.IN_CALC, cstat.ACTRSC_MOVE, cstat.IN_FO_PROCESS, |
512 | - list(cstat.timedoutRscopSet), list(cstat.shutNodeSet))) | |
509 | + list(cstat.timedoutRscopSet), dict(cstat.attrDict), dict(cstat.nodeDict))) | |
513 | 510 | return True |
514 | 511 | except Exception, strerror: |
515 | 512 | pm_log.error("StatusFile.read: I/O error occurred.") |
@@ -538,10 +535,10 @@ | ||
538 | 535 | os.ftruncate(f, l) |
539 | 536 | os.close(f) |
540 | 537 | pm_log.debug("StatusFile.write: [%d:%d], FAIL[%s], IN_CALC[%s], "\ |
541 | - "RSC_MOVE[%s], IN_FO[%s], Rscop%s, Node%s" % | |
538 | + "RSC_MOVE[%s], IN_FO[%s], Rscop%s, attrDict%s, nodeDict%s" % | |
542 | 539 | (cstat.ino, cstat.offset, cstat.FAILURE_OCCURRED, |
543 | 540 | cstat.IN_CALC, cstat.ACTRSC_MOVE, cstat.IN_FO_PROCESS, |
544 | - list(cstat.timedoutRscopSet), list(cstat.shutNodeSet))) | |
541 | + list(cstat.timedoutRscopSet), dict(cstat.attrDict), dict(cstat.nodeDict))) | |
545 | 542 | return True |
546 | 543 | except Exception, strerror: |
547 | 544 | pm_log.error("StatusFile.write: I/O error occurred.") |
@@ -993,9 +990,9 @@ | ||
993 | 990 | psr.add_option("-s", action="store_true", dest="ask_status", |
994 | 991 | default=False, help="return pm_logconv status") |
995 | 992 | psr.add_option("-c", action="store_true", dest="is_continue", |
996 | - default=False, help="start with a continuous mode (\"-p\" option is mutually exclusive)") | |
993 | + default=False, help="start with a continuous mode (\"-p\" option is mutually exclusive)") | |
997 | 994 | psr.add_option("-p", action="store_true", dest="is_present", |
998 | - default=False, help="start with a present mode (\"-c\" option is mutually exclusive)") | |
995 | + default=False, help="start with a present mode (\"-c\" option is mutually exclusive)") | |
999 | 996 | psr.add_option("-f", dest="config_file", default=CONFIGFILE, |
1000 | 997 | help="the specified configuration file is used") |
1001 | 998 | psr.add_option("-v", "--version", action="callback", callback=print_version, |
@@ -1280,8 +1277,8 @@ | ||
1280 | 1277 | ''' |
1281 | 1278 | Check DC node is idle or not with crmadmin command. |
1282 | 1279 | When DC is idle, crmadmin returns "S_IDLE" status. |
1283 | - return: True -> DC is idle. | |
1284 | - False -> DC is not idle. | |
1280 | + return: True -> local is idle. | |
1281 | + False -> local is not idle or not DC. | |
1285 | 1282 | None -> error occurs. |
1286 | 1283 | cannot execute command or maybe during DC election. |
1287 | 1284 | ''' |
@@ -1290,36 +1287,13 @@ | ||
1290 | 1287 | # crmadmin command's default value is 30sec. |
1291 | 1288 | TIMEOUT = 30 * 1000 |
1292 | 1289 | |
1293 | - # Heartbeat status check | |
1294 | - if self.funcs.is_heartbeat() != True: | |
1295 | - return False | |
1296 | - | |
1297 | - # Get DC node name. | |
1298 | - options = ("-D -t %s" % (TIMEOUT)) | |
1290 | + # Get DC status. | |
1291 | + options = ("-S %s -t %s" % (HOSTNAME, TIMEOUT)) | |
1299 | 1292 | (status, output) = \ |
1300 | 1293 | self.funcs.exec_outside_cmd(CMD_CRMADMIN, options, False) |
1301 | 1294 | if status == None: |
1302 | 1295 | # Failed to exec command. |
1303 | - pm_log.warn("is_idle(): failed to get DC node name.") | |
1304 | - return None | |
1305 | - if status != 0: | |
1306 | - # Maybe during DC election. | |
1307 | - return False | |
1308 | - try: | |
1309 | - dcnode = output.split()[-1] | |
1310 | - except: | |
1311 | - # Failed to parse output strings. | |
1312 | - pm_log.warn("is_idle(): failed to parse output strings." + | |
1313 | - "(DC node name)") | |
1314 | - return None | |
1315 | - | |
1316 | - # Get DC status. | |
1317 | - options = ("-S %s -t %s" % (dcnode, TIMEOUT)) | |
1318 | - (status, output) = \ | |
1319 | - self.funcs.exec_outside_cmd(CMD_CRMADMIN, options, False) | |
1320 | - if status == None: | |
1321 | - # Failed to exec command. | |
1322 | - pm_log.warn("is_idle(): failed to get DC node status.") | |
1296 | + pm_log.warn("is_idle(): failed to get local node status.") | |
1323 | 1297 | return None |
1324 | 1298 | if status != 0: |
1325 | 1299 | # Maybe during DC election. |
@@ -1329,9 +1303,9 @@ | ||
1329 | 1303 | except: |
1330 | 1304 | # Failed to parse output strings. |
1331 | 1305 | pm_log.warn("is_idle(): failed to parse output strings." + |
1332 | - "DC node status") | |
1306 | + "local node status") | |
1333 | 1307 | return None |
1334 | - if dcstat == "S_IDLE": | |
1308 | + if dcstat == "S_IDLE" or dcstat == "S_NOT_DC": | |
1335 | 1309 | return True |
1336 | 1310 | return False |
1337 | 1311 |
@@ -1451,16 +1425,20 @@ | ||
1451 | 1425 | # FailOver pattern |
1452 | 1426 | # resource failer + resource move |
1453 | 1427 | # score failer + resource move |
1428 | + # node failer + resource move | |
1454 | 1429 | # node failer + resource start |
1455 | 1430 | # resource failer + resource stop |
1456 | 1431 | # score failer + resource stop |
1432 | + # node failer + resource stop | |
1457 | 1433 | # node failer + resource stopped |
1458 | 1434 | if \ |
1459 | 1435 | (cstat.FAILURE_OCCURRED == FAIL_RSC and cstat.ACTRSC_MOVE == FAIL_MOVE) or \ |
1460 | 1436 | (cstat.FAILURE_OCCURRED == FAIL_SCORE and cstat.ACTRSC_MOVE == FAIL_MOVE) or \ |
1437 | + (cstat.FAILURE_OCCURRED == FAIL_NODE and cstat.ACTRSC_MOVE == FAIL_MOVE) or \ | |
1461 | 1438 | (cstat.FAILURE_OCCURRED == FAIL_NODE and cstat.ACTRSC_MOVE == FAIL_STR) or \ |
1462 | 1439 | (cstat.FAILURE_OCCURRED == FAIL_RSC and cstat.ACTRSC_MOVE == FAIL_STP) or \ |
1463 | 1440 | (cstat.FAILURE_OCCURRED == FAIL_SCORE and cstat.ACTRSC_MOVE == FAIL_STP) or \ |
1441 | + (cstat.FAILURE_OCCURRED == FAIL_NODE and cstat.ACTRSC_MOVE == FAIL_STP) or \ | |
1464 | 1442 | (cstat.FAILURE_OCCURRED == FAIL_NODE and cstat.ACTRSC_MOVE == FAIL_STPD): |
1465 | 1443 | self.funcs.detect_fo_start(outputobj) |
1466 | 1444 | if lconvfrm.ignoremsg: |
@@ -1823,18 +1801,17 @@ | ||
1823 | 1801 | class RscStat: |
1824 | 1802 | ''' |
1825 | 1803 | rscid : resource id. |
1826 | - status : [Started on node|Stopped] | |
1827 | - fofailed : True -> F/O failed. ("cannot run anywhere" appeared.) | |
1828 | - False -> "cannot run anywhere" didn't appear. | |
1804 | + status : [Started on node|Stopped|Move node -> node] | |
1829 | 1805 | unmanaged: True -> resource is unmanaged. |
1830 | 1806 | False -> resource is managed. |
1807 | + operated : True -> resource is operated. | |
1808 | + False -> resource isn't operated. | |
1831 | 1809 | ''' |
1832 | - def __init__(self, rscid=None, status=None, fofailed=False, | |
1833 | - unmanaged=False): | |
1810 | + def __init__(self, rscid=None, status=None, unmanaged=False, operated=False): | |
1834 | 1811 | self.rscid = rscid |
1835 | 1812 | self.status = status |
1836 | - self.fofailed = fofailed | |
1837 | 1813 | self.unmanaged = unmanaged |
1814 | + self.operated = operated | |
1838 | 1815 | |
1839 | 1816 | ''' operator eq ''' |
1840 | 1817 | def __eq__(self,other): |
@@ -1844,20 +1821,16 @@ | ||
1844 | 1821 | def replace(self,new): |
1845 | 1822 | if new.status: |
1846 | 1823 | self.status = new.status |
1847 | - if new.fofailed: | |
1848 | - self.fofailed = new.fofailed | |
1849 | 1824 | if new.unmanaged: |
1850 | 1825 | self.unmanaged = new.unmanaged |
1826 | + if new.operated: | |
1827 | + self.operated = new.operated | |
1851 | 1828 | |
1852 | 1829 | ''' |
1853 | 1830 | Only for debug. |
1854 | 1831 | ''' |
1855 | 1832 | def print_rscstat(self): |
1856 | - print "rsc:%s\tstatus:%s\tfofailed:%s\tunmanaged:%s\t" % (self.rscid,self.status,self.fofailed,self.unmanaged) | |
1857 | -# print self.rscid | |
1858 | -# print self.status | |
1859 | -# print self.fofailed | |
1860 | -# print self.unmanaged | |
1833 | + print "rsc:%s\tstatus:%s\tunmanaged:%s\toperated:%s" % (self.rscid,self.status,self.unmanaged,self.operated) | |
1861 | 1834 | |
1862 | 1835 | ''' |
1863 | 1836 | Return codes for functions to convert log. |
@@ -2061,33 +2034,43 @@ | ||
2061 | 2034 | None -> error occurs or attribute doesn't exist. |
2062 | 2035 | ''' |
2063 | 2036 | def check_attribute(self, attrname, op, attrval, node): |
2064 | - | |
2065 | - # Execute command. | |
2066 | - options = ("-G -U %s -t status -n %s" % (node, attrname)) | |
2067 | - (status, output) = \ | |
2068 | - self.exec_outside_cmd(CMD_CRM_ATTR, options, False) | |
2069 | - if status == None: | |
2070 | - # Failed to exec command, or | |
2071 | - # The node is dead, or | |
2072 | - # Specified attribute doesn't exist. | |
2073 | - pm_log.warn("check_attribute(): " + | |
2074 | - "failed to get %s's value." % (attrname)) | |
2037 | + if not (node, attrname) in cstat.attrDict: | |
2075 | 2038 | return None, None |
2076 | - | |
2077 | - pm_log.debug("check_attribute(): " + | |
2078 | - "%s's status[%s] output[%s] node[%s] attr[%s]" % | |
2079 | - (CMD_CRM_ATTR, status, output, node, attrname)) | |
2080 | - | |
2081 | - if status != 0: | |
2082 | - # crm_attribute returns error value. | |
2083 | - # Maybe local node is shutting down. | |
2084 | - return None, None | |
2085 | - # In normal case, crm_attribute command shows like the following. | |
2086 | - # " name=default_ping_set value=100" | |
2087 | - # So parse it to get current attribute value. | |
2088 | - try: | |
2039 | + # Get attribute value from log. | |
2040 | + currentval = cstat.attrDict[node, attrname] | |
2041 | + | |
2042 | + if currentval == None: | |
2043 | + # Get attribute value from command. | |
2044 | + # Execute command. | |
2045 | + options = ("-G -U %s -t status -n %s" % (node, attrname)) | |
2046 | + (status, output) = self.exec_outside_cmd(CMD_CRM_ATTR, options, False) | |
2047 | + if status == None: | |
2048 | + # Failed to exec command, or | |
2049 | + # The node is dead, or | |
2050 | + # Specified attribute doesn't exist. | |
2051 | + pm_log.warn("check_attribute(): " + | |
2052 | + "failed to get %s's value." % (attrname)) | |
2053 | + return None, None | |
2054 | + | |
2055 | + pm_log.debug("check_attribute(): " + | |
2056 | + "%s's status[%s] output[%s] node[%s] attr[%s]" % | |
2057 | + (CMD_CRM_ATTR, status, output, node, attrname)) | |
2058 | + | |
2059 | + if status != 0: | |
2060 | + # crm_attribute returns error value. | |
2061 | + # Maybe local node is shutting down. | |
2062 | + return None, None | |
2063 | + # In normal case, crm_attribute command shows like the following. | |
2064 | + # " name=default_ping_set value=100" | |
2065 | + # So parse it to get current attribute value. | |
2089 | 2066 | valuepos = output.index('value=') |
2090 | 2067 | currentval = output[valuepos + len('value='):].strip() |
2068 | + else: | |
2069 | + pm_log.debug("check_attribute(): " + | |
2070 | + "log's node[%s] attr[%s] value[%s]" % | |
2071 | + (node, attrname, currentval)) | |
2072 | + | |
2073 | + try: | |
2091 | 2074 | if currentval.isdigit() and attrval.isdigit(): |
2092 | 2075 | result = getattr(operator, op)(int(currentval),int(attrval)) |
2093 | 2076 | else: |
@@ -2101,7 +2084,7 @@ | ||
2101 | 2084 | return result, currentval |
2102 | 2085 | |
2103 | 2086 | ''' |
2104 | - Compare attribute value with it that acquired from CIB. | |
2087 | + Compare attribute value with it that acquired from CIB or log message. | |
2105 | 2088 | Operations to compare is [lt|gt|le|ge|eq|ne]. |
2106 | 2089 | arg1 : list of target attributes ([[name, op, value], bool_op, [...] ...]) |
2107 | 2090 | arg2 : node name which has the attribute. |
@@ -2125,28 +2108,41 @@ | ||
2125 | 2108 | for i,rule in [(i,x) for (i,x) in enumerate(rules) if i % 2 == 0]: |
2126 | 2109 | if not rule[0] or rule[0] in attrs: |
2127 | 2110 | continue |
2128 | - # Execute command. | |
2129 | - opts = ("-G -U %s -t status -n %s"%(node, rule[0])) | |
2130 | - (status, output) = self.exec_outside_cmd(CMD_CRM_ATTR, opts, False) | |
2131 | - if status == None: | |
2132 | - # Failed to exec command, or | |
2133 | - # The node is dead, or | |
2134 | - # Specified attribute doesn't exist. | |
2135 | - pm_log.warn("check_attributes(): " | |
2136 | - "failed to get %s's value."%(rule[0])) | |
2137 | - return None | |
2138 | - pm_log.debug("check_attributes(): " | |
2139 | - "%s's status[%s] output[%s] node[%s] attr[%s]" | |
2140 | - %(CMD_CRM_ATTR, status, output, node, rule[0])) | |
2141 | - | |
2142 | - if status != 0: | |
2143 | - # crm_attribute returns error value. | |
2144 | - # Maybe local node is shutting down. | |
2145 | - return None | |
2146 | - # In normal case, crm_attribute command shows like the following. | |
2147 | - # "name=default_ping_set value=100" | |
2148 | - # So parse it to get current attribute value. | |
2149 | - attrs[rule[0]] = output[output.index('value=')+len('value='):].strip() | |
2111 | + if (node, rule[0]) in cstat.attrDict: | |
2112 | + # Get attribute value from log. | |
2113 | + attrs[rule[0]] = cstat.attrDict[node, rule[0]] | |
2114 | + | |
2115 | + if attrs[rule[0]] == None: | |
2116 | + # Get attribute value from command. | |
2117 | + # Execute command. | |
2118 | + opts = ("-G -U %s -t status -n %s"%(node, rule[0])) | |
2119 | + (status, output) = self.exec_outside_cmd(CMD_CRM_ATTR, opts, False) | |
2120 | + if status == None: | |
2121 | + # Failed to exec command, or | |
2122 | + # The node is dead, or | |
2123 | + # Specified attribute doesn't exist. | |
2124 | + pm_log.warn("check_attributes(): " | |
2125 | + "failed to get %s's value."%(rule[0])) | |
2126 | + return None | |
2127 | + pm_log.debug("check_attributes(): " | |
2128 | + "%s's status[%s] output[%s] node[%s] attr[%s]" | |
2129 | + %(CMD_CRM_ATTR, status, output, node, rule[0])) | |
2130 | + | |
2131 | + if status != 0: | |
2132 | + # crm_attribute returns error value. | |
2133 | + # Maybe local node is shutting down. | |
2134 | + return None | |
2135 | + # In normal case, crm_attribute command shows like the following. | |
2136 | + # "name=default_ping_set value=100" | |
2137 | + # So parse it to get current attribute value. | |
2138 | + attrs[rule[0]] = output[output.index('value=')+len('value='):].strip() | |
2139 | + else: | |
2140 | + pm_log.debug("check_attribute(): " + | |
2141 | + "log's node[%s] attr[%s] value[%s]" % | |
2142 | + (node, rule[0], attrs[rule[0]])) | |
2143 | + | |
2144 | + if len(attrs) < 1: | |
2145 | + return None | |
2150 | 2146 | pm_log.debug("check_attributes(): attrs%s"%(attrs)) |
2151 | 2147 | |
2152 | 2148 | # phase1: Operate each condition of the attribute. |
@@ -2250,41 +2246,41 @@ | ||
2250 | 2246 | return False |
2251 | 2247 | |
2252 | 2248 | ''' |
2253 | - Get online node from command. | |
2254 | - return : active node in the cluster. | |
2255 | - None -> error occurs. | |
2249 | + Check unmatched attribute exists or not. | |
2250 | + Compared with the attribute's rule of configure file. | |
2251 | + return : True -> unmatched attribute exists. | |
2252 | + False -> unmatched attribute not exists. | |
2256 | 2253 | ''' |
2257 | - def get_onlinenode(self): | |
2258 | - onlineset = set() | |
2259 | - ret = self.is_heartbeat() | |
2260 | - if ret == None: | |
2261 | - return ret | |
2262 | - elif ret == False: | |
2263 | - return onlineset | |
2264 | - options = ("-p") | |
2265 | - (status, nodelist) = self.exec_outside_cmd(CMD_CRM_NODE, options, False) | |
2266 | - if status == None: | |
2267 | - # Failed to exec command. | |
2268 | - pm_log.warn("get_onlinenode(): failed to get active nodelist.") | |
2269 | - return None | |
2270 | - | |
2271 | - for nodename in nodelist.split(): | |
2272 | - options = ("-N %s -n standby -G -l forever -d off" % (nodename)) | |
2273 | - (status, output) = self.exec_outside_cmd(CMD_CRM_ATTR, options, False) | |
2274 | - if status == None: | |
2275 | - # Failed to exec command. | |
2276 | - pm_log.warn("get_onlinenode(): failed to get online nodelist.") | |
2277 | - return None | |
2278 | - try: | |
2279 | - standby = output[output.index("value"):] | |
2280 | - except: | |
2281 | - pm_log.debug("get_onlinenode(): " + | |
2282 | - "failed to parse output strings. [%s]" % (output)) | |
2254 | + def check_unmatch_attr_rule(self): | |
2255 | + for node, stat in cstat.nodeDict.iteritems(): | |
2256 | + if stat != "online": | |
2283 | 2257 | continue |
2284 | - if standby.split("=")[1] == "off": | |
2285 | - onlineset.add(nodename) | |
2286 | - pm_log.debug("get_onlinenode(): node %s is online node." % (list(onlineset))) | |
2287 | - return onlineset | |
2258 | + # Check each attribute's value. | |
2259 | + for attrRule in attrRuleList: | |
2260 | + attrname, op, attrval = tuple(attrRule) | |
2261 | + # Check attribute's value for each node. | |
2262 | + # Now, the node seems to be active. | |
2263 | + result = self.check_attribute(attrname, op, attrval, node)[0] | |
2264 | + pm_log.debug("check_unmatch_attr_rule(): " | |
2265 | + "check_attribute returns [%s]"%(result)) | |
2266 | + if result: | |
2267 | + # attribute's value means "failure(s) occurred"! | |
2268 | + return True | |
2269 | + # [COMMENT] | |
2270 | + # result == False: | |
2271 | + # attribute did not change or | |
2272 | + # it was updated to normal value. | |
2273 | + # result == None: | |
2274 | + # some errors occurred in check_attribute() or | |
2275 | + # the node is not running or | |
2276 | + # specified attribute does not exist. | |
2277 | + for rules in attrRules: | |
2278 | + result = self.check_attributes(rules[:], node) | |
2279 | + pm_log.debug("check_unmatch_attr_rule(): " | |
2280 | + "check_attributes returns [%s]"%(result)) | |
2281 | + if result: | |
2282 | + return True | |
2283 | + return False | |
2288 | 2284 | |
2289 | 2285 | ''' |
2290 | 2286 | Set specified values to RscStat object list. |
@@ -2293,13 +2289,13 @@ | ||
2293 | 2289 | When the arg's value is None, don't update the element's value. |
2294 | 2290 | |
2295 | 2291 | arg1 : resource id. |
2296 | - arg2 : the rsc's status. [Started on node|Stopped] | |
2297 | - arg3 : the rsc's F/O failed or not. (depends on "cannot run anywhere") | |
2298 | - arg4 : the rsc is managed or not. | |
2292 | + arg2 : the rsc's status. [Started on node|Stopped|Move node -> node] | |
2293 | + arg3 : the rsc is managed or not. | |
2294 | + arg4 : the rsc is operated or not. | |
2299 | 2295 | return Nothing. |
2300 | 2296 | ''' |
2301 | - def set_rscstat(self, rscid, statstr, fofailed, unmanaged): | |
2302 | - newrsc = RscStat(rscid,statstr,fofailed,unmanaged) | |
2297 | + def set_rscstat(self, rscid, statstr, unmanaged, operated): | |
2298 | + newrsc = RscStat(rscid, statstr, unmanaged, operated) | |
2303 | 2299 | if newrsc in self.rscstatList: |
2304 | 2300 | idx = self.rscstatList.index(newrsc) |
2305 | 2301 | self.rscstatList[idx].replace(newrsc) |
@@ -2311,10 +2307,10 @@ | ||
2311 | 2307 | ''' |
2312 | 2308 | def debug_status(self): |
2313 | 2309 | pm_log.debug("debug_status(): FAIL[%s], IN_CALC[%s], "\ |
2314 | - "RSC_MOVE[%s], IN_FO[%s], Rscop%s, Node%s" % | |
2310 | + "RSC_MOVE[%s], IN_FO[%s], Rscop%s, attrDict%s, nodeDict%s" % | |
2315 | 2311 | (cstat.FAILURE_OCCURRED, cstat.IN_CALC, |
2316 | 2312 | cstat.ACTRSC_MOVE, cstat.IN_FO_PROCESS, |
2317 | - list(cstat.timedoutRscopSet), list(cstat.shutNodeSet))) | |
2313 | + list(cstat.timedoutRscopSet), dict(cstat.attrDict), dict(cstat.nodeDict))) | |
2318 | 2314 | |
2319 | 2315 | ''' |
2320 | 2316 | Clear ConvertStatus (exclude ino and offset). |
@@ -2328,18 +2324,23 @@ | ||
2328 | 2324 | cstat.ACTRSC_MOVE = False |
2329 | 2325 | cstat.IN_FO_PROCESS = False |
2330 | 2326 | cstat.timedoutRscopSet = set() |
2331 | - cstat.shutNodeSet = set() | |
2327 | + cstat.attrDict = dict() | |
2328 | + cstat.nodeDict = dict() | |
2332 | 2329 | self.debug_status() |
2333 | 2330 | |
2334 | 2331 | ''' |
2335 | - Clear ConvertStatus (exclude shutNodeSet, ino and offset). | |
2332 | + Clear ConvertStatus (exclude nodeDict, ino and offset). | |
2336 | 2333 | ''' |
2337 | - def clear_status_except_shutnode(self): | |
2338 | - pm_log.debug("clear_status_except_shutnode():" + | |
2339 | - "clear convert status (exclude shutNodeSet, ino and offset).") | |
2340 | - tmp_shutNodeSet = cstat.shutNodeSet | |
2341 | - self.clear_status() | |
2342 | - cstat.shutNodeSet = tmp_shutNodeSet | |
2334 | + def clear_status_except_node(self): | |
2335 | + pm_log.debug("clear_status_except_node():" + | |
2336 | + "clear convert status (exclude nodeDict, ino and offset).") | |
2337 | + self.debug_status() | |
2338 | + cstat.FAILURE_OCCURRED = False | |
2339 | + cstat.IN_CALC = False | |
2340 | + cstat.ACTRSC_MOVE = False | |
2341 | + cstat.IN_FO_PROCESS = False | |
2342 | + cstat.timedoutRscopSet = set() | |
2343 | + cstat.attrDict = dict() | |
2343 | 2344 | self.debug_status() |
2344 | 2345 | |
2345 | 2346 | ########## |
@@ -2597,14 +2598,35 @@ | ||
2597 | 2598 | output_loglevel = self.LOG_WARN_LV |
2598 | 2599 | status = "lost" |
2599 | 2600 | elif status == "active": |
2600 | - if nodename in cstat.shutNodeSet: | |
2601 | - cstat.shutNodeSet.discard(nodename) | |
2602 | 2601 | status = "member" |
2602 | + cstat.nodeDict[nodename] = status | |
2603 | 2603 | |
2604 | 2604 | convertedlog = ("Node %s is %s." % (nodename, status)) |
2605 | 2605 | outputobj.output_log(output_loglevel, convertedlog) |
2606 | 2606 | return CONV_OK |
2607 | 2607 | |
2608 | + ''' | |
2609 | + Determine Node status. | |
2610 | + So it outputs nothing. | |
2611 | + | |
2612 | + MsgNo. 6-3) | |
2613 | + Jun 14 15:04:03 x3650a pengine: [3748]: info: determine_online_status: Node x3650a is shutting down | |
2614 | + Jun 14 15:04:56 x3650a pengine: [21571]: info: determine_online_status: Node x3650a is online | |
2615 | + Jun 14 15:05:42 x3650a pengine: [21571]: info: determine_online_status_fencing: Node x3650b is down | |
2616 | + ''' | |
2617 | + def node_status_determined(self, outputobj, logelm, lconvfrm): | |
2618 | + try: | |
2619 | + nodename = logelm.halogmsg.split()[2] | |
2620 | + nodestat = " ".join(logelm.halogmsg.split()[4:]) | |
2621 | + except: | |
2622 | + return CONV_PARSE_ERROR | |
2623 | + | |
2624 | + if self.is_empty(nodename, nodestat): | |
2625 | + return CONV_ITEM_EMPTY | |
2626 | + | |
2627 | + cstat.nodeDict[nodename] = nodestat | |
2628 | + return CONV_OK | |
2629 | + | |
2608 | 2630 | ########## |
2609 | 2631 | # For Interconnect-LAN status event and |
2610 | 2632 | # Network status event (detected by pingd). |
@@ -2844,7 +2866,6 @@ | ||
2844 | 2866 | if self.is_empty(procname, pgid): |
2845 | 2867 | return CONV_ITEM_EMPTY |
2846 | 2868 | |
2847 | - cstat.shutNodeSet.add(HOSTNAME) | |
2848 | 2869 | convertedlog = ("Stop \"%s\" process normally. (pid=%s)" % (procname, pgid)) |
2849 | 2870 | outputobj.output_log(lconvfrm.loglevel, convertedlog) |
2850 | 2871 | return CONV_OK |
@@ -2906,44 +2927,6 @@ | ||
2906 | 2927 | self.rscstatList = None |
2907 | 2928 | self.rscstatList = list() |
2908 | 2929 | |
2909 | - # If any failure didn't occur and Heartbeat is not in shutdown process, | |
2910 | - # check each attribute's value to decide whether it is F/O or not. | |
2911 | - if cstat.FAILURE_OCCURRED == False: | |
2912 | - nodeset = self.get_onlinenode() | |
2913 | - if nodeset == None: | |
2914 | - return CONV_GETINFO_ERROR | |
2915 | - for node in (nodeset - cstat.shutNodeSet): | |
2916 | - # Check each attribute's value. | |
2917 | - for attrRule in attrRuleList: | |
2918 | - attrname, op, attrval = tuple(attrRule) | |
2919 | - # Check attribute's value for each node. | |
2920 | - # Now, the node seems to be active. | |
2921 | - result = self.check_attribute(attrname, op, attrval, node)[0] | |
2922 | - pm_log.debug("detect_pe_calc(): " | |
2923 | - "check_attribute returns [%s]"%(result)) | |
2924 | - if result == True: | |
2925 | - # attribute's value means "failure(s) occurred"! | |
2926 | - cstat.FAILURE_OCCURRED = FAIL_SCORE | |
2927 | - if cstat.ACTRSC_MOVE == FAIL_MOVE or \ | |
2928 | - cstat.ACTRSC_MOVE == FAIL_STP: | |
2929 | - self.detect_fo_start(outputobj) | |
2930 | - # [COMMENT] | |
2931 | - # result == False: | |
2932 | - # attribute did not change or | |
2933 | - # it was updated to normal value. | |
2934 | - # result == None: | |
2935 | - # some errors occurred in check_attribute() or | |
2936 | - # the node is not running or | |
2937 | - # specified attribute does not exist. | |
2938 | - for rules in attrRules: | |
2939 | - result = self.check_attributes(rules[:], node) | |
2940 | - pm_log.debug("detect_pe_calc(): " | |
2941 | - "check_attributes returns [%s]"%(result)) | |
2942 | - if result: | |
2943 | - cstat.FAILURE_OCCURRED = FAIL_SCORE | |
2944 | - if (cstat.ACTRSC_MOVE == FAIL_MOVE or | |
2945 | - cstat.ACTRSC_MOVE == FAIL_STP): | |
2946 | - self.detect_fo_start(outputobj) | |
2947 | 2930 | return CONV_OK |
2948 | 2931 | |
2949 | 2932 | ''' |
@@ -2969,9 +2952,9 @@ | ||
2969 | 2952 | break |
2970 | 2953 | |
2971 | 2954 | if cstat.IN_FO_PROCESS == False: |
2972 | - self.clear_status_except_shutnode() | |
2955 | + self.clear_status_except_node() | |
2973 | 2956 | return CONV_OK |
2974 | - self.clear_status_except_shutnode() | |
2957 | + self.clear_status_except_node() | |
2975 | 2958 | |
2976 | 2959 | # When one or more Unmanaged resource exists in the cluster, |
2977 | 2960 | # (even if the resource is not set in act_rsc) |
@@ -2991,15 +2974,16 @@ | ||
2991 | 2974 | detect_fo_failed = False |
2992 | 2975 | for rscstat in self.rscstatList: |
2993 | 2976 | if rscstat.rscid in actRscList: |
2994 | - if rscstat.fofailed or rscstat.status == "Stopped" : | |
2977 | + if rscstat.status == "Stopped" : | |
2995 | 2978 | output_loglevel = self.LOG_ERR_LV |
2996 | 2979 | output_status = ("Stopped") |
2997 | 2980 | detect_fo_failed = True |
2998 | 2981 | else: |
2999 | 2982 | output_loglevel = self.LOG_INFO_LV |
3000 | 2983 | output_status = rscstat.status |
3001 | - convertedlog = ("Resource %s : %s" % (rscstat.rscid, output_status)) | |
3002 | - outputobj.output_log(output_loglevel, convertedlog) | |
2984 | + if not (rscstat.status != "Stopped" and rscstat.operated != True): | |
2985 | + convertedlog = ("Resource %s : %s" % (rscstat.rscid, output_status)) | |
2986 | + outputobj.output_log(output_loglevel, convertedlog) | |
3003 | 2987 | |
3004 | 2988 | if detect_fo_failed: |
3005 | 2989 | outputobj.output_log(self.LOG_ERR_LV, "fail-over failed.") |
@@ -3034,8 +3018,8 @@ | ||
3034 | 3018 | if self.is_empty(nodename): |
3035 | 3019 | return CONV_ITEM_EMPTY |
3036 | 3020 | |
3037 | - if nodename in cstat.shutNodeSet: | |
3038 | - pm_log.debug("The [%s] exists in the shutdown list." % (nodename)) | |
3021 | + if cstat.nodeDict.has_key(nodename) and cstat.nodeDict[nodename] != "online": | |
3022 | + pm_log.debug("The [%s] is not online." % (nodename)) | |
3039 | 3023 | pm_log.debug("Ignore the fotrigger flag setting.") |
3040 | 3024 | return CONV_SHUT_NODE |
3041 | 3025 |
@@ -3047,7 +3031,7 @@ | ||
3047 | 3031 | So it outputs nothing. |
3048 | 3032 | |
3049 | 3033 | MsgNo. F11-1) |
3050 | - Jan 5 15:12:25 x3650a pengine: [16657]: notice: LogActions: Start prmExPostgreSQLDB (x3650a) | |
3034 | + Jan 5 15:12:25 x3650a pengine: [16657]: notice: LogActions: Start prmExPostgreSQLDB (x3650a) | |
3051 | 3035 | ''' |
3052 | 3036 | def add_rsc_start(self, outputobj, logelm, lconvfrm): |
3053 | 3037 | try: |
@@ -3074,7 +3058,7 @@ | ||
3074 | 3058 | This is to get resource status when F/O finished. |
3075 | 3059 | |
3076 | 3060 | MsgNo. F11-2) |
3077 | - Jan 5 15:19:23 x3650a pengine: [17658]: notice: LogActions: Stop resource prmExPostgreSQLDB (x3650a) | |
3061 | + Jan 5 15:19:23 x3650a pengine: [17658]: notice: LogActions: Stop resource prmExPostgreSQLDB (x3650a) | |
3078 | 3062 | ''' |
3079 | 3063 | def add_rsc_stop(self, outputobj, logelm, lconvfrm): |
3080 | 3064 | try: |
@@ -3091,7 +3075,12 @@ | ||
3091 | 3075 | |
3092 | 3076 | if rscid in actRscList: |
3093 | 3077 | cstat.ACTRSC_MOVE = FAIL_STP |
3094 | - if cstat.FAILURE_OCCURRED == FAIL_RSC or cstat.FAILURE_OCCURRED == FAIL_SCORE: | |
3078 | + if cstat.FAILURE_OCCURRED == False and self.check_unmatch_attr_rule() == True: | |
3079 | + cstat.FAILURE_OCCURRED = FAIL_SCORE | |
3080 | + if \ | |
3081 | + cstat.FAILURE_OCCURRED == FAIL_RSC or \ | |
3082 | + cstat.FAILURE_OCCURRED == FAIL_SCORE or \ | |
3083 | + cstat.FAILURE_OCCURRED == FAIL_NODE: | |
3095 | 3084 | self.detect_fo_start(outputobj) |
3096 | 3085 | return CONV_OK |
3097 | 3086 |
@@ -3101,11 +3090,11 @@ | ||
3101 | 3090 | So it outputs nothing. |
3102 | 3091 | |
3103 | 3092 | MsgNo.F11-3) |
3104 | - Jan 5 15:36:42 x3650a pengine: [27135]: notice: LogActions: Leave resource prmFsPostgreSQLDB1 (Started x3650a) | |
3093 | + Jan 5 15:36:42 x3650a pengine: [27135]: notice: LogActions: Leave resource prmFsPostgreSQLDB1 (Started x3650a) | |
3105 | 3094 | MsgNo.F11-8) |
3106 | 3095 | Jan 5 14:50:05 x3650a pengine: [13197]: notice: LogActions: Restart resource prmIpPostgreSQLDB (Started x3650b) |
3107 | 3096 | MsgNo.F11-9) |
3108 | - Jan 5 14:50:41 x3650a pengine: [13197]: notice: LogActions: Leave resource prmPingd:0 (Stopped) | |
3097 | + Jan 5 14:50:41 x3650a pengine: [13197]: notice: LogActions: Leave resource prmPingd:0 (Stopped) | |
3109 | 3098 | ''' |
3110 | 3099 | def add_no_action(self, outputobj, logelm, lconvfrm): |
3111 | 3100 | try: |
@@ -3135,27 +3124,6 @@ | ||
3135 | 3124 | return CONV_OK |
3136 | 3125 | |
3137 | 3126 | ''' |
3138 | - Detect resouce cannot run anywhere. | |
3139 | - This is to get resource status when F/O finished. | |
3140 | - So it outputs nothing. | |
3141 | - | |
3142 | - MsgNo. F11-4) | |
3143 | - Jan 5 15:19:20 x3650a pengine: [17658]: WARN: native_color: Resource prmApPostgreSQLDB cannot run anywhere | |
3144 | - ''' | |
3145 | - def detect_cannot_run_anywhere(self, outputobj, logelm, lconvfrm): | |
3146 | - try: | |
3147 | - wordlist = logelm.halogmsg.split() | |
3148 | - rscid = wordlist[2] | |
3149 | - except: | |
3150 | - return CONV_PARSE_ERROR | |
3151 | - if self.is_empty(rscid): | |
3152 | - return CONV_ITEM_EMPTY | |
3153 | - | |
3154 | - # Set the resource's status to the list. | |
3155 | - self.set_rscstat(rscid, None, True, None) | |
3156 | - return CONV_OK | |
3157 | - | |
3158 | - ''' | |
3159 | 3127 | Detect resouce became unmanaged. |
3160 | 3128 | This is to get resource status when F/O finished. |
3161 | 3129 | So it outputs nothing. |
@@ -3178,7 +3146,7 @@ | ||
3178 | 3146 | return CONV_ITEM_EMPTY |
3179 | 3147 | |
3180 | 3148 | # Set the resource's status to the list. |
3181 | - self.set_rscstat(rscid, None, None, True) | |
3149 | + self.set_rscstat(rscid, None, True, None) | |
3182 | 3150 | return CONV_OK |
3183 | 3151 | |
3184 | 3152 | ''' |
@@ -3186,7 +3154,7 @@ | ||
3186 | 3154 | This is to get resource status when F/O started. |
3187 | 3155 | |
3188 | 3156 | MsgNo. F11-6) |
3189 | - Jan 5 15:12:27 x3650a pengine: [16657]: notice: LogActions: Move resource prmExPostgreSQLDB (Started x3650a -> x3650b) | |
3157 | + Jan 5 15:12:27 x3650a pengine: [16657]: notice: LogActions: Move resource prmExPostgreSQLDB (Started x3650a -> x3650b) | |
3190 | 3158 | ''' |
3191 | 3159 | def add_rsc_move(self, outputobj, logelm, lconvfrm): |
3192 | 3160 | try: |
@@ -3206,8 +3174,40 @@ | ||
3206 | 3174 | |
3207 | 3175 | if rscid in actRscList: |
3208 | 3176 | cstat.ACTRSC_MOVE = FAIL_MOVE |
3209 | - if cstat.FAILURE_OCCURRED == FAIL_RSC or cstat.FAILURE_OCCURRED == FAIL_SCORE: | |
3177 | + if cstat.FAILURE_OCCURRED == False and self.check_unmatch_attr_rule() == True: | |
3178 | + cstat.FAILURE_OCCURRED = FAIL_SCORE | |
3179 | + if \ | |
3180 | + cstat.FAILURE_OCCURRED == FAIL_RSC or \ | |
3181 | + cstat.FAILURE_OCCURRED == FAIL_SCORE or \ | |
3182 | + cstat.FAILURE_OCCURRED == FAIL_NODE: | |
3210 | 3183 | self.detect_fo_start(outputobj) |
3184 | + return CONV_OK | |
3185 | + | |
3186 | + ''' | |
3187 | + Resource initiating action. | |
3188 | + This is to get resource status when F/O finished. | |
3189 | + So it outputs nothing. | |
3190 | + | |
3191 | + MsgNo. F11-10) | |
3192 | + May 27 11:23:50 x3650a crmd: [8108]: info: te_rsc_command: Initiating action 25: start prmExPostgreSQLDB_start_0 on x3650a (local) | |
3193 | + May 27 11:23:50 x3650a crmd: [8108]: info: te_rsc_command: Initiating action 25: start prmExPostgreSQLDB_start_0 on x3650b | |
3194 | + ''' | |
3195 | + def rsc_init_action(self, outputobj, logelm, lconvfrm): | |
3196 | + if cstat.IN_FO_PROCESS == False: | |
3197 | + return CONV_OK | |
3198 | + | |
3199 | + try: | |
3200 | + rscid, op = self.parse_opid(logelm.halogmsg.split()[5])[:2] | |
3201 | + if op == "monitor": | |
3202 | + return CONV_OK | |
3203 | + except: | |
3204 | + return CONV_PARSE_ERROR | |
3205 | + | |
3206 | + if self.is_empty(rscid): | |
3207 | + return CONV_ITEM_EMPTY | |
3208 | + | |
3209 | + if rscid in actRscList: | |
3210 | + self.set_rscstat(rscid, None, None, True) | |
3211 | 3211 | |
3212 | 3212 | return CONV_OK |
3213 | 3213 |
@@ -3268,7 +3268,6 @@ | ||
3268 | 3268 | if self.is_empty(nodename): |
3269 | 3269 | return CONV_ITEM_EMPTY |
3270 | 3270 | |
3271 | - cstat.shutNodeSet.add(nodename) | |
3272 | 3271 | convertedlog = ("Pacemaker on %s is shutting down." % (nodename)) |
3273 | 3272 | outputobj.output_log(lconvfrm.loglevel, convertedlog) |
3274 | 3273 | return CONV_OK |
@@ -3284,7 +3283,7 @@ | ||
3284 | 3283 | ''' |
3285 | 3284 | def detect_hb_shutdown(self, outputobj, logelm, lconvfrm): |
3286 | 3285 | outputobj.output_log(lconvfrm.loglevel, lconvfrm.rulename) |
3287 | - cstat.shutNodeSet.clear() | |
3286 | + cstat.nodeDict.clear() | |
3288 | 3287 | return CONV_OK |
3289 | 3288 | |
3290 | 3289 | ''' |
@@ -3297,7 +3296,6 @@ | ||
3297 | 3296 | Jan 18 10:36:18 x3650a crmd: [12294]: info: crm_shutdown: Requesting shutdown |
3298 | 3297 | ''' |
3299 | 3298 | def detect_pcmk_shutting_down(self, outputobj, logelm, lconvfrm): |
3300 | - cstat.shutNodeSet.add(HOSTNAME) | |
3301 | 3299 | outputobj.output_log(lconvfrm.loglevel, lconvfrm.rulename) |
3302 | 3300 | return CONV_OK |
3303 | 3301 |
@@ -3316,7 +3314,7 @@ | ||
3316 | 3314 | if self.is_empty(nodename): |
3317 | 3315 | return CONV_ITEM_EMPTY |
3318 | 3316 | |
3319 | - cstat.shutNodeSet.add(nodename) | |
3317 | + cstat.nodeDict[nodename] = "shutting down" | |
3320 | 3318 | return CONV_OK |
3321 | 3319 | |
3322 | 3320 | ########## |
@@ -3491,11 +3489,44 @@ | ||
3491 | 3489 | outputobj.output_log(lconvfrm.loglevel, convertedlog) |
3492 | 3490 | return CONV_OK |
3493 | 3491 | |
3492 | + ''' | |
3493 | + Detect cib updated or added. | |
3494 | + | |
3495 | + MsgNo. 22-3) | |
3496 | + Jul 8 11:30:24 x3650a crmd: [4118]: info: abort_transition_graph: \ | |
3497 | + te_update_diff:150 - Triggered transition abort \ | |
3498 | + (complete=1, tag=nvpair, id=status-f8d52aae-518b-4b06-b1a1-b23486f8b410-default_ping_set, name=NA, value=100, magic=NA, cib=0.10.47) \ | |
3499 | + : Transient attribute: update | |
3500 | + Jul 8 11:30:24 x3650a crmd: [4118]: info: abort_transition_graph: \ | |
3501 | + te_update_diff:150 - Triggered transition abort \ | |
3502 | + (complete=1, tag=nvpair, id=status-f8d52aae-518b-4b06-b1a1-b23486f8b410-default_ping_set, magic=NA, cib=0.10.47) \ | |
3503 | + : Transient attribute: update | |
3504 | + ''' | |
3505 | + def detect_cib_updated(self, outputobj, logelm, lconvfrm): | |
3506 | + try: | |
3507 | + attrval=None | |
3508 | + for word in logelm.halogmsg.split(", "): | |
3509 | + if word.startswith("id="): | |
3510 | + uuid_attrname = word.split("=")[1].split("-", 1)[1] | |
3511 | + nodeuuid = uuid_attrname[0:36] | |
3512 | + nodename = self.get_nodename(nodeuuid) | |
3513 | + attrname = uuid_attrname.replace(nodeuuid + "-" , "") | |
3514 | + elif word.startswith("value="): | |
3515 | + attrval = word.split("=")[1] | |
3516 | + except: | |
3517 | + return CONV_PARSE_ERROR | |
3518 | + | |
3519 | + if self.is_empty(nodename, attrname, attrval): | |
3520 | + return CONV_ITEM_EMPTY | |
3521 | + | |
3522 | + cstat.attrDict[nodename, attrname] = attrval | |
3523 | + return CONV_OK | |
3524 | + | |
3494 | 3525 | ########## |
3495 | 3526 | # For Heartbeat service starts. |
3496 | 3527 | ########## |
3497 | 3528 | ''' |
3498 | - Heartbeat log message which means Heartbeat service is starting. | |
3529 | + Convert log message which means Heartbeat service is starting. | |
3499 | 3530 | |
3500 | 3531 | MsgNo.23-1) |
3501 | 3532 | Jul 15 15:50:31 x3650a heartbeat: [22780]: info: Configuration validated. Starting heartbeat 3.0.3 |
@@ -0,0 +1,93 @@ | ||
1 | +######################################## | |
2 | +# Derived definitions | |
3 | +######################################## | |
4 | +%define name pm_logconv | |
5 | +%define cluster hb | |
6 | +%define version 1.2 | |
7 | +%define release 1 | |
8 | +%define prefix /usr | |
9 | +%define instdir pm_logconv | |
10 | +%define ORGARCH %{name}-%{version} | |
11 | +# | |
12 | +# | |
13 | +Summary: Pacemaker and Heartbeat log converter | |
14 | +Name: %{name}-%{cluster} | |
15 | +Version: %{version} | |
16 | +Release: %{release}%{?dist} | |
17 | +Group: Applications | |
18 | +Source: %{name}-%{version}.tar.gz | |
19 | +License: GPL | |
20 | +Vendor: NIPPON TELEGRAPH AND TELEPHONE CORPORATION | |
21 | +BuildRoot: %{_tmppath}/%{name}-%{version} | |
22 | +BuildRequires: make | |
23 | +BuildArch: noarch | |
24 | +Requires: python >= 2.4, python < 3.0 | |
25 | +Requires: pacemaker >= 1.0.9, pacemaker < 1.1 | |
26 | +Requires: heartbeat >= 3.0.3 | |
27 | + | |
28 | +######################################## | |
29 | +%description | |
30 | +Log message converter for Pacemaker and Heartbeat. | |
31 | +support version | |
32 | + Pacemaker : stable-1.0 (1.0.9 or more) | |
33 | + Heartbeat : 3.0.3 | |
34 | + | |
35 | +######################################## | |
36 | +%prep | |
37 | +######################################## | |
38 | +rm -rf $RPM_BUILD_ROOT | |
39 | + | |
40 | +######################################## | |
41 | +%setup -q | |
42 | +######################################## | |
43 | + | |
44 | +######################################## | |
45 | +%build | |
46 | +######################################## | |
47 | + | |
48 | +######################################## | |
49 | +%configure | |
50 | +######################################## | |
51 | + | |
52 | +######################################## | |
53 | +%pre | |
54 | +######################################## | |
55 | + | |
56 | +######################################## | |
57 | +%install | |
58 | +######################################## | |
59 | +make DESTDIR=$RPM_BUILD_ROOT install | |
60 | + | |
61 | +######################################## | |
62 | +%clean | |
63 | +######################################## | |
64 | +if | |
65 | + [ -n "${RPM_BUILD_ROOT}" -a "${RPM_BUILD_ROOT}" != "/" ] | |
66 | +then | |
67 | + rm -rf $RPM_BUILD_ROOT | |
68 | +fi | |
69 | +rm -rf $RPM_BUILD_DIR/%{ORGARCH} | |
70 | + | |
71 | +######################################## | |
72 | +%post | |
73 | +######################################## | |
74 | +true | |
75 | +######################################## | |
76 | +%preun | |
77 | +######################################## | |
78 | +true | |
79 | +######################################## | |
80 | +%postun | |
81 | +######################################## | |
82 | +true | |
83 | + | |
84 | +######################################## | |
85 | +%files | |
86 | +######################################## | |
87 | +%defattr(-,root,root) | |
88 | +%dir /etc | |
89 | +%config /etc/pm_logconv.conf | |
90 | +%dir %{prefix}/share/pacemaker/%{instdir} | |
91 | +%{prefix}/share/pacemaker/%{instdir}/pm_logconv.py | |
92 | +%ghost %{prefix}/share/pacemaker/%{instdir}/pm_logconv.pyc | |
93 | +%ghost %{prefix}/share/pacemaker/%{instdir}/pm_logconv.pyo |
@@ -1,93 +0,0 @@ | ||
1 | -######################################## | |
2 | -# Derived definitions | |
3 | -######################################## | |
4 | -%define name pm_logconv | |
5 | -%define cluster hb | |
6 | -%define version 1.1 | |
7 | -%define release 1.el@RHEL_VER@ | |
8 | -%define prefix /usr | |
9 | -%define instdir pm_logconv | |
10 | -%define ORGARCH %{name}-%{version} | |
11 | -# | |
12 | -# | |
13 | -Summary: Pacemaker and Heartbeat log converter | |
14 | -Name: %{name}-%{cluster} | |
15 | -Version: %{version} | |
16 | -Release: %{release} | |
17 | -Group: Applications | |
18 | -Source: %{name}-%{version}.tar.gz | |
19 | -License: GPL | |
20 | -Vendor: NIPPON TELEGRAPH AND TELEPHONE CORPORATION | |
21 | -BuildRoot: %{_tmppath}/%{name}-%{version} | |
22 | -BuildRequires: make | |
23 | -BuildArch: noarch | |
24 | -Requires: python >= 2.4, python < 3.0 | |
25 | -Requires: pacemaker >= 1.0.9 | |
26 | -Requires: heartbeat >= 3.0.3 | |
27 | - | |
28 | -######################################## | |
29 | -%description | |
30 | -Log message converter for Pacemaker and Heartbeat. | |
31 | -support version | |
32 | - Pacemaker : stable-1.0 (1.0.9 or more) | |
33 | - Heartbeat : 3.0.3 | |
34 | - | |
35 | -######################################## | |
36 | -%prep | |
37 | -######################################## | |
38 | -rm -rf $RPM_BUILD_ROOT | |
39 | - | |
40 | -######################################## | |
41 | -%setup -q | |
42 | -######################################## | |
43 | - | |
44 | -######################################## | |
45 | -%build | |
46 | -######################################## | |
47 | - | |
48 | -######################################## | |
49 | -%configure | |
50 | -######################################## | |
51 | - | |
52 | -######################################## | |
53 | -%pre | |
54 | -######################################## | |
55 | - | |
56 | -######################################## | |
57 | -%install | |
58 | -######################################## | |
59 | -make DESTDIR=$RPM_BUILD_ROOT install | |
60 | - | |
61 | -######################################## | |
62 | -%clean | |
63 | -######################################## | |
64 | -if | |
65 | - [ -n "${RPM_BUILD_ROOT}" -a "${RPM_BUILD_ROOT}" != "/" ] | |
66 | -then | |
67 | - rm -rf $RPM_BUILD_ROOT | |
68 | -fi | |
69 | -rm -rf $RPM_BUILD_DIR/%{ORGARCH} | |
70 | - | |
71 | -######################################## | |
72 | -%post | |
73 | -######################################## | |
74 | -true | |
75 | -######################################## | |
76 | -%preun | |
77 | -######################################## | |
78 | -true | |
79 | -######################################## | |
80 | -%postun | |
81 | -######################################## | |
82 | -true | |
83 | - | |
84 | -######################################## | |
85 | -%files | |
86 | -######################################## | |
87 | -%defattr(-,root,root) | |
88 | -%dir /etc | |
89 | -%config /etc/pm_logconv.conf | |
90 | -%dir %{prefix}/share/pacemaker/%{instdir} | |
91 | -%{prefix}/share/pacemaker/%{instdir}/pm_logconv.py | |
92 | -%ghost %{prefix}/share/pacemaker/%{instdir}/pm_logconv.pyc | |
93 | -%ghost %{prefix}/share/pacemaker/%{instdir}/pm_logconv.pyo |