pacemaker  1.1.24-3850484742
Scalable High-Availability cluster resource manager
unpack.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 #include <crm_internal.h>
19 
20 #include <glib.h>
21 
22 #include <crm/crm.h>
23 #include <crm/services.h>
24 #include <crm/msg_xml.h>
25 #include <crm/common/xml.h>
26 
27 #include <crm/common/util.h>
28 #include <crm/pengine/rules.h>
29 #include <crm/pengine/internal.h>
30 #include <unpack.h>
31 #include <pe_status_private.h>
32 
33 CRM_TRACE_INIT_DATA(pe_status);
34 
35 #define set_config_flag(data_set, option, flag) do { \
36  const char *tmp = pe_pref(data_set->config_hash, option); \
37  if(tmp) { \
38  if(crm_is_true(tmp)) { \
39  set_bit(data_set->flags, flag); \
40  } else { \
41  clear_bit(data_set->flags, flag); \
42  } \
43  } \
44  } while(0)
45 
46 gboolean unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure,
47  enum action_fail_response *failed, pe_working_set_t * data_set);
48 static gboolean determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node);
49 
50 // Bitmask for warnings we only want to print once
52 
53 static gboolean
54 is_dangling_container_remote_node(node_t *node)
55 {
56  /* we are looking for a remote-node that was supposed to be mapped to a
57  * container resource, but all traces of that container have disappeared
58  * from both the config and the status section. */
59  if (is_remote_node(node) &&
60  node->details->remote_rsc &&
61  node->details->remote_rsc->container == NULL &&
63  return TRUE;
64  }
65 
66  return FALSE;
67 }
68 
69 
78 void
80  const char *reason, bool priority_delay)
81 {
82  CRM_CHECK(node, return);
83 
84  /* A guest node is fenced by marking its container as failed */
85  if (is_container_remote_node(node)) {
86  resource_t *rsc = node->details->remote_rsc->container;
87 
88  if (is_set(rsc->flags, pe_rsc_failed) == FALSE) {
89  if (!is_set(rsc->flags, pe_rsc_managed)) {
90  crm_notice("Not fencing guest node %s "
91  "(otherwise would because %s): "
92  "its guest resource %s is unmanaged",
93  node->details->uname, reason, rsc->id);
94  } else {
95  crm_warn("Guest node %s will be fenced "
96  "(by recovering its guest resource %s): %s",
97  node->details->uname, rsc->id, reason);
98 
99  /* We don't mark the node as unclean because that would prevent the
100  * node from running resources. We want to allow it to run resources
101  * in this transition if the recovery succeeds.
102  */
103  node->details->remote_requires_reset = TRUE;
104  set_bit(rsc->flags, pe_rsc_failed);
105  }
106  }
107 
108  } else if (is_dangling_container_remote_node(node)) {
109  crm_info("Cleaning up dangling connection for guest node %s: "
110  "fencing was already done because %s, "
111  "and guest resource no longer exists",
112  node->details->uname, reason);
114 
115  } else if (is_baremetal_remote_node(node)) {
116  resource_t *rsc = node->details->remote_rsc;
117 
118  if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) {
119  crm_notice("Not fencing remote node %s "
120  "(otherwise would because %s): connection is unmanaged",
121  node->details->uname, reason);
122  } else if(node->details->remote_requires_reset == FALSE) {
123  node->details->remote_requires_reset = TRUE;
124  crm_warn("Remote node %s %s: %s",
125  node->details->uname,
126  pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
127  reason);
128  }
129  node->details->unclean = TRUE;
130  // No need to apply `priority-fencing-delay` for remote nodes
131  pe_fence_op(node, NULL, TRUE, reason, FALSE, data_set);
132 
133  } else if (node->details->unclean) {
134  crm_trace("Cluster node %s %s because %s",
135  node->details->uname,
136  pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean",
137  reason);
138 
139  } else {
140  crm_warn("Cluster node %s %s: %s",
141  node->details->uname,
142  pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
143  reason);
144  node->details->unclean = TRUE;
145  pe_fence_op(node, NULL, TRUE, reason, priority_delay, data_set);
146  }
147 }
148 
149 // @TODO xpaths can't handle templates, rules, or id-refs
150 
151 // nvpair with provides or requires set to unfencing
152 #define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \
153  "[(@" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_PROVIDES "'" \
154  "or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \
155  "and @" XML_NVPAIR_ATTR_VALUE "='unfencing']"
156 
157 // unfencing in rsc_defaults or any resource
158 #define XPATH_ENABLE_UNFENCING \
159  "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \
160  "//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \
161  "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \
162  "/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR
163 
164 static
165 void set_if_xpath(unsigned long long flag, const char *xpath,
166  pe_working_set_t *data_set)
167 {
168  xmlXPathObjectPtr result = NULL;
169 
170  if (is_not_set(data_set->flags, flag)) {
171  result = xpath_search(data_set->input, xpath);
172  if (result && (numXpathResults(result) > 0)) {
173  set_bit(data_set->flags, flag);
174  }
175  freeXpathObject(result);
176  }
177 }
178 
179 gboolean
180 unpack_config(xmlNode * config, pe_working_set_t * data_set)
181 {
182  const char *value = NULL;
183  GHashTable *config_hash = crm_str_table_new();
184 
185  data_set->config_hash = config_hash;
186 
187  unpack_instance_attributes(data_set->input, config, XML_CIB_TAG_PROPSET, NULL, config_hash,
188  CIB_OPTIONS_FIRST, FALSE, data_set->now);
189 
190  verify_pe_options(data_set->config_hash);
191 
192  set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes);
193  if(is_not_set(data_set->flags, pe_flag_startup_probes)) {
194  crm_info("Startup probes: disabled (dangerous)");
195  }
196 
197  value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG);
198  if (value && crm_is_true(value)) {
199  crm_notice("Watchdog will be used via SBD if fencing is required");
201  }
202 
203  /* Set certain flags via xpath here, so they can be used before the relevant
204  * configuration sections are unpacked.
205  */
206  set_if_xpath(pe_flag_enable_unfencing, XPATH_ENABLE_UNFENCING, data_set);
207 
208  value = pe_pref(data_set->config_hash, "stonith-timeout");
209  data_set->stonith_timeout = crm_get_msec(value);
210  crm_debug("STONITH timeout: %d", data_set->stonith_timeout);
211 
212  set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled);
213  crm_debug("STONITH of failed nodes is %s",
214  is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled");
215 
216  data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action");
217  if (!strcmp(data_set->stonith_action, "poweroff")) {
219  "Support for stonith-action of 'poweroff' is deprecated "
220  "and will be removed in a future release (use 'off' instead)");
221  data_set->stonith_action = "off";
222  }
223  crm_trace("STONITH will %s nodes", data_set->stonith_action);
224 
225  set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing);
226  crm_debug("Concurrent fencing is %s",
227  is_set(data_set->flags, pe_flag_concurrent_fencing) ? "enabled" : "disabled");
228 
229 #ifdef ENABLE_PRIORITY_FENCING_DELAY
230  value = pe_pref(data_set->config_hash,
232  if (value) {
233  data_set->priority_fencing_delay = crm_get_msec(value) / 1000;
234  crm_trace("Priority fencing delay is %ds", data_set->priority_fencing_delay);
235  }
236 #endif
237 
238  set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything);
239  crm_debug("Stop all active resources: %s",
240  is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false");
241 
242  set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster);
243  if (is_set(data_set->flags, pe_flag_symmetric_cluster)) {
244  crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
245  }
246 
247  value = pe_pref(data_set->config_hash, "default-resource-stickiness");
248  if (value) {
250  "Support for 'default-resource-stickiness' cluster property"
251  " is deprecated and will be removed in a future release"
252  " (use resource-stickiness in rsc_defaults instead)");
253  }
254  data_set->default_resource_stickiness = char2score(value);
255  crm_debug("Default stickiness: %d", data_set->default_resource_stickiness);
256 
257  value = pe_pref(data_set->config_hash, "no-quorum-policy");
258 
259  if (safe_str_eq(value, "ignore")) {
261 
262  } else if (safe_str_eq(value, "freeze")) {
264 
265  } else if (safe_str_eq(value, "suicide")) {
266  if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
267  int do_panic = 0;
268 
270  &do_panic);
271  if (do_panic || is_set(data_set->flags, pe_flag_have_quorum)) {
273  } else {
274  crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum");
275  data_set->no_quorum_policy = no_quorum_stop;
276  }
277  } else {
278  crm_config_err("Resetting no-quorum-policy to 'stop': stonith is not configured");
279  data_set->no_quorum_policy = no_quorum_stop;
280  }
281 
282  } else {
283  data_set->no_quorum_policy = no_quorum_stop;
284  }
285 
286  switch (data_set->no_quorum_policy) {
287  case no_quorum_freeze:
288  crm_debug("On loss of CCM Quorum: Freeze resources");
289  break;
290  case no_quorum_stop:
291  crm_debug("On loss of CCM Quorum: Stop ALL resources");
292  break;
293  case no_quorum_suicide:
294  crm_notice("On loss of CCM Quorum: Fence all remaining nodes");
295  break;
296  case no_quorum_ignore:
297  crm_notice("On loss of CCM Quorum: Ignore");
298  break;
299  }
300 
301  set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans);
302  crm_trace("Orphan resources are %s",
303  is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored");
304 
305  set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans);
306  crm_trace("Orphan resource actions are %s",
307  is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored");
308 
309  set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop);
310  crm_trace("Stopped resources are removed from the status section: %s",
311  is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false");
312 
313  set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode);
314  crm_trace("Maintenance mode: %s",
315  is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false");
316 
317  if (is_set(data_set->flags, pe_flag_maintenance_mode)) {
319  } else if (pe_pref(data_set->config_hash, "is-managed-default")) {
320  set_config_flag(data_set, "is-managed-default", pe_flag_is_managed_default);
322  "Support for 'is-managed-default' cluster property"
323  " is deprecated and will be removed in a future release"
324  " (use is-managed in rsc_defaults instead)");
325  }
326  crm_trace("By default resources are %smanaged",
327  is_set(data_set->flags, pe_flag_is_managed_default) ? "" : "not ");
328 
329  set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal);
330  crm_trace("Start failures are %s",
331  is_set(data_set->flags,
332  pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount");
333 
334  if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
335  set_config_flag(data_set, "startup-fencing", pe_flag_startup_fencing);
336  }
337  if (is_set(data_set->flags, pe_flag_startup_fencing)) {
338  crm_trace("Unseen nodes will be fenced");
339  } else {
340  pe_warn_once(pe_wo_blind, "Blind faith: not fencing unseen nodes");
341  }
342 
343  node_score_red = char2score(pe_pref(data_set->config_hash, "node-health-red"));
344  node_score_green = char2score(pe_pref(data_set->config_hash, "node-health-green"));
345  node_score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow"));
346 
347  crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s",
348  pe_pref(data_set->config_hash, "node-health-red"),
349  pe_pref(data_set->config_hash, "node-health-yellow"),
350  pe_pref(data_set->config_hash, "node-health-green"));
351 
352  data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
353  crm_trace("Placement strategy: %s", data_set->placement_strategy);
354 
355  return TRUE;
356 }
357 
358 static void
359 destroy_digest_cache(gpointer ptr)
360 {
361  op_digest_cache_t *data = ptr;
362 
363  free_xml(data->params_all);
364  free_xml(data->params_secure);
365  free_xml(data->params_restart);
366 
367  free(data->digest_all_calc);
368  free(data->digest_restart_calc);
369  free(data->digest_secure_calc);
370 
371  free(data);
372 }
373 
374 node_t *
375 pe_create_node(const char *id, const char *uname, const char *type,
376  const char *score, pe_working_set_t * data_set)
377 {
378  node_t *new_node = NULL;
379 
380  if (pe_find_node(data_set->nodes, uname) != NULL) {
381  crm_config_warn("Detected multiple node entries with uname=%s"
382  " - this is rarely intended", uname);
383  }
384 
385  new_node = calloc(1, sizeof(node_t));
386  if (new_node == NULL) {
387  return NULL;
388  }
389 
390  new_node->weight = char2score(score);
391  new_node->fixed = FALSE;
392  new_node->details = calloc(1, sizeof(struct node_shared_s));
393 
394  if (new_node->details == NULL) {
395  free(new_node);
396  return NULL;
397  }
398 
399  crm_trace("Creating node for entry %s/%s", uname, id);
400  new_node->details->id = id;
401  new_node->details->uname = uname;
402  new_node->details->online = FALSE;
403  new_node->details->shutdown = FALSE;
404  new_node->details->rsc_discovery_enabled = TRUE;
405  new_node->details->running_rsc = NULL;
406  new_node->details->type = node_ping;
407 
408  if (safe_str_eq(type, "remote")) {
409  new_node->details->type = node_remote;
411  } else if (type == NULL || safe_str_eq(type, "member")
412  || safe_str_eq(type, NORMALNODE)) {
413  new_node->details->type = node_member;
414  }
415 
416  new_node->details->attrs = crm_str_table_new();
417 
418  if (is_remote_node(new_node)) {
419  g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
420  strdup("remote"));
421  } else {
422  g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
423  strdup("cluster"));
424  }
425 
426  new_node->details->utilization = crm_str_table_new();
427 
428  new_node->details->digest_cache =
429  g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str,
430  destroy_digest_cache);
431 
432  data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname);
433  return new_node;
434 }
435 
436 bool
437 remote_id_conflict(const char *remote_name, pe_working_set_t *data)
438 {
439  bool match = FALSE;
440 #if 1
441  pe_find_resource(data->resources, remote_name);
442 #else
443  if (data->name_check == NULL) {
444  data->name_check = g_hash_table_new(crm_str_hash, g_str_equal);
445  for (xml_rsc = __xml_first_child_element(parent); xml_rsc != NULL;
446  xml_rsc = __xml_next_element(xml_rsc)) {
447 
448  const char *id = ID(xml_rsc);
449 
450  /* avoiding heap allocation here because we know the duration of this hashtable allows us to */
451  g_hash_table_insert(data->name_check, (char *) id, (char *) id);
452  }
453  }
454  if (g_hash_table_lookup(data->name_check, remote_name)) {
455  match = TRUE;
456  }
457 #endif
458  if (match) {
459  crm_err("Invalid remote-node name, a resource called '%s' already exists.", remote_name);
460  return NULL;
461  }
462 
463  return match;
464 }
465 
466 
467 static const char *
468 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data)
469 {
470  xmlNode *attr_set = NULL;
471  xmlNode *attr = NULL;
472 
473  const char *container_id = ID(xml_obj);
474  const char *remote_name = NULL;
475  const char *remote_server = NULL;
476  const char *remote_port = NULL;
477  const char *connect_timeout = "60s";
478  const char *remote_allow_migrate=NULL;
479  const char *container_managed = NULL;
480 
481  for (attr_set = __xml_first_child_element(xml_obj); attr_set != NULL;
482  attr_set = __xml_next_element(attr_set)) {
483  if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) {
484  continue;
485  }
486 
487  for (attr = __xml_first_child_element(attr_set); attr != NULL;
488  attr = __xml_next_element(attr)) {
489  const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
490  const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
491 
493  remote_name = value;
494  } else if (safe_str_eq(name, "remote-addr")) {
495  remote_server = value;
496  } else if (safe_str_eq(name, "remote-port")) {
497  remote_port = value;
498  } else if (safe_str_eq(name, "remote-connect-timeout")) {
499  connect_timeout = value;
500  } else if (safe_str_eq(name, "remote-allow-migrate")) {
501  remote_allow_migrate=value;
502  } else if (safe_str_eq(name, XML_RSC_ATTR_MANAGED)) {
503  container_managed = value;
504  }
505  }
506  }
507 
508  if (remote_name == NULL) {
509  return NULL;
510  }
511 
512  if (remote_id_conflict(remote_name, data)) {
513  return NULL;
514  }
515 
516  pe_create_remote_xml(parent, remote_name, container_id,
517  remote_allow_migrate, container_managed, "30s", "30s",
518  connect_timeout, remote_server, remote_port);
519  return remote_name;
520 }
521 
522 static void
523 handle_startup_fencing(pe_working_set_t *data_set, node_t *new_node)
524 {
525  if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) {
526  /* Ignore fencing for remote nodes that don't have a connection resource
527  * associated with them. This happens when remote node entries get left
528  * in the nodes section after the connection resource is removed.
529  */
530  return;
531  }
532 
533  if (is_set(data_set->flags, pe_flag_startup_fencing)) {
534  // All nodes are unclean until we've seen their status entry
535  new_node->details->unclean = TRUE;
536 
537  } else {
538  // Blind faith ...
539  new_node->details->unclean = FALSE;
540  }
541 
542  /* We need to be able to determine if a node's status section
543  * exists or not separate from whether the node is unclean. */
544  new_node->details->unseen = TRUE;
545 }
546 
547 gboolean
548 unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set)
549 {
550  xmlNode *xml_obj = NULL;
551  node_t *new_node = NULL;
552  const char *id = NULL;
553  const char *uname = NULL;
554  const char *type = NULL;
555  const char *score = NULL;
556 
557  for (xml_obj = __xml_first_child_element(xml_nodes); xml_obj != NULL;
558  xml_obj = __xml_next_element(xml_obj)) {
559 
560  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) {
561  new_node = NULL;
562 
563  id = crm_element_value(xml_obj, XML_ATTR_ID);
564  uname = crm_element_value(xml_obj, XML_ATTR_UNAME);
565  type = crm_element_value(xml_obj, XML_ATTR_TYPE);
566  score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
567  crm_trace("Processing node %s/%s", uname, id);
568 
569  if (id == NULL) {
570  crm_config_err("Must specify id tag in <node>");
571  continue;
572  }
573  new_node = pe_create_node(id, uname, type, score, data_set);
574 
575  if (new_node == NULL) {
576  return FALSE;
577  }
578 
579 /* if(data_set->have_quorum == FALSE */
580 /* && data_set->no_quorum_policy == no_quorum_stop) { */
581 /* /\* start shutting resources down *\/ */
582 /* new_node->weight = -INFINITY; */
583 /* } */
584 
585  handle_startup_fencing(data_set, new_node);
586 
587  add_node_attrs(xml_obj, new_node, FALSE, data_set);
588  unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_UTILIZATION, NULL,
589  new_node->details->utilization, NULL, FALSE, data_set->now);
590 
591  crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
592  }
593  }
594 
595  if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) {
596  crm_info("Creating a fake local node");
597  pe_create_node(data_set->localhost, data_set->localhost, NULL, 0,
598  data_set);
599  }
600 
601  return TRUE;
602 }
603 
604 static void
605 setup_container(resource_t * rsc, pe_working_set_t * data_set)
606 {
607  const char *container_id = NULL;
608 
609  if (rsc->children) {
610  GListPtr gIter = rsc->children;
611 
612  for (; gIter != NULL; gIter = gIter->next) {
613  resource_t *child_rsc = (resource_t *) gIter->data;
614 
615  setup_container(child_rsc, data_set);
616  }
617  return;
618  }
619 
620  container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
621  if (container_id && safe_str_neq(container_id, rsc->id)) {
622  resource_t *container = pe_find_resource(data_set->resources, container_id);
623 
624  if (container) {
625  rsc->container = container;
626  set_bit(container->flags, pe_rsc_is_container);
627  container->fillers = g_list_append(container->fillers, rsc);
628  pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id);
629  } else {
630  pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id);
631  }
632  }
633 }
634 
635 gboolean
636 unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set)
637 {
638  xmlNode *xml_obj = NULL;
639 
640  /* generate remote nodes from resource config before unpacking resources */
641  for (xml_obj = __xml_first_child_element(xml_resources); xml_obj != NULL;
642  xml_obj = __xml_next_element(xml_obj)) {
643 
644  const char *new_node_id = NULL;
645 
646  /* first check if this is a bare metal remote node. Bare metal remote nodes
647  * are defined as a resource primitive only. */
648  if (xml_contains_remote_node(xml_obj)) {
649  new_node_id = ID(xml_obj);
650  /* The "pe_find_node" check is here to make sure we don't iterate over
651  * an expanded node that has already been added to the node list. */
652  if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
653  crm_trace("Found baremetal remote node %s in container resource %s", new_node_id, ID(xml_obj));
654  pe_create_node(new_node_id, new_node_id, "remote", NULL,
655  data_set);
656  }
657  continue;
658  }
659 
660  /* Now check for guest remote nodes.
661  * guest remote nodes are defined within a resource primitive.
662  * Example1: a vm resource might be configured as a remote node.
663  * Example2: a vm resource might be configured within a group to be a remote node.
664  * Note: right now we only support guest remote nodes in as a standalone primitive
665  * or a primitive within a group. No cloned primitives can be a guest remote node
666  * right now */
667  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, TRUE)) {
668  /* expands a metadata defined remote resource into the xml config
669  * as an actual rsc primitive to be unpacked later. */
670  new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set);
671 
672  if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
673  crm_trace("Found guest remote node %s in container resource %s", new_node_id, ID(xml_obj));
674  pe_create_node(new_node_id, new_node_id, "remote", NULL,
675  data_set);
676  }
677  continue;
678 
679  } else if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, TRUE)) {
680  xmlNode *xml_obj2 = NULL;
681  /* search through a group to see if any of the primitive contain a remote node. */
682  for (xml_obj2 = __xml_first_child_element(xml_obj); xml_obj2 != NULL;
683  xml_obj2 = __xml_next_element(xml_obj2)) {
684 
685  new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set);
686 
687  if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
688  crm_trace("Found guest remote node %s in container resource %s which is in group %s", new_node_id, ID(xml_obj2), ID(xml_obj));
689  pe_create_node(new_node_id, new_node_id, "remote", NULL,
690  data_set);
691  }
692  }
693  }
694  }
695  return TRUE;
696 }
697 
698 
699 /* Call this after all the nodes and resources have been
700  * unpacked, but before the status section is read.
701  *
702  * A remote node's online status is reflected by the state
703  * of the remote node's connection resource. We need to link
704  * the remote node to this connection resource so we can have
705  * easy access to the connection resource during the PE calculations.
706  */
707 static void
708 link_rsc2remotenode(pe_working_set_t *data_set, resource_t *new_rsc)
709 {
710  node_t *remote_node = NULL;
711 
712  if (new_rsc->is_remote_node == FALSE) {
713  return;
714  }
715 
716  if (is_set(data_set->flags, pe_flag_quick_location)) {
717  /* remote_nodes and remote_resources are not linked in quick location calculations */
718  return;
719  }
720 
721  remote_node = pe_find_node(data_set->nodes, new_rsc->id);
722  CRM_CHECK(remote_node != NULL, return;);
723 
724  pe_rsc_trace(new_rsc, "Linking remote connection resource %s to node %s",
725  new_rsc->id, remote_node->details->uname);
726  remote_node->details->remote_rsc = new_rsc;
727  /* If this is a baremetal remote-node (no container resource
728  * associated with it) then we need to handle startup fencing the same way
729  * as cluster nodes. */
730  if (new_rsc->container == NULL) {
731  handle_startup_fencing(data_set, remote_node);
732  } else {
733  /* At this point we know if the remote node is a container or baremetal
734  * remote node, update the #kind attribute if a container is involved */
735  g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND),
736  strdup("container"));
737  }
738 }
739 
740 static void
741 destroy_tag(gpointer data)
742 {
743  tag_t *tag = data;
744 
745  if (tag) {
746  free(tag->id);
747  g_list_free_full(tag->refs, free);
748  free(tag);
749  }
750 }
751 
764 gboolean
765 unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set)
766 {
767  xmlNode *xml_obj = NULL;
768  GListPtr gIter = NULL;
769 
770  data_set->template_rsc_sets =
771  g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str,
772  destroy_tag);
773 
774  for (xml_obj = __xml_first_child_element(xml_resources); xml_obj != NULL;
775  xml_obj = __xml_next_element(xml_obj)) {
776 
777  resource_t *new_rsc = NULL;
778 
779  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) {
780  const char *template_id = ID(xml_obj);
781 
782  if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets,
783  template_id, NULL, NULL) == FALSE) {
784  /* Record the template's ID for the knowledge of its existence anyway. */
785  g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL);
786  }
787  continue;
788  }
789 
790  crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj));
791  if (common_unpack(xml_obj, &new_rsc, NULL, data_set) && (new_rsc != NULL)) {
792  data_set->resources = g_list_append(data_set->resources, new_rsc);
793  pe_rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
794 
795  } else {
796  crm_config_err("Failed unpacking %s %s",
797  crm_element_name(xml_obj), crm_element_value(xml_obj, XML_ATTR_ID));
798  if (new_rsc != NULL && new_rsc->fns != NULL) {
799  new_rsc->fns->free(new_rsc);
800  }
801  }
802  }
803 
804  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
805  resource_t *rsc = (resource_t *) gIter->data;
806 
807  setup_container(rsc, data_set);
808  link_rsc2remotenode(data_set, rsc);
809  }
810 
811  data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority);
812  if (is_set(data_set->flags, pe_flag_quick_location)) {
813  /* Ignore */
814 
815  } else if (is_set(data_set->flags, pe_flag_stonith_enabled)
816  && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) {
817 
818  crm_config_err("Resource start-up disabled since no STONITH resources have been defined");
819  crm_config_err("Either configure some or disable STONITH with the stonith-enabled option");
820  crm_config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
821  }
822 
823  return TRUE;
824 }
825 
826 gboolean
827 unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set)
828 {
829  xmlNode *xml_tag = NULL;
830 
831  data_set->tags =
832  g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_tag);
833 
834  for (xml_tag = __xml_first_child_element(xml_tags); xml_tag != NULL;
835  xml_tag = __xml_next_element(xml_tag)) {
836 
837  xmlNode *xml_obj_ref = NULL;
838  const char *tag_id = ID(xml_tag);
839 
840  if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) {
841  continue;
842  }
843 
844  if (tag_id == NULL) {
845  crm_config_err("Failed unpacking %s: %s should be specified",
846  crm_element_name(xml_tag), XML_ATTR_ID);
847  continue;
848  }
849 
850  for (xml_obj_ref = __xml_first_child_element(xml_tag); xml_obj_ref != NULL;
851  xml_obj_ref = __xml_next_element(xml_obj_ref)) {
852 
853  const char *obj_ref = ID(xml_obj_ref);
854 
855  if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) {
856  continue;
857  }
858 
859  if (obj_ref == NULL) {
860  crm_config_err("Failed unpacking %s for tag %s: %s should be specified",
861  crm_element_name(xml_obj_ref), tag_id, XML_ATTR_ID);
862  continue;
863  }
864 
865  if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) {
866  return FALSE;
867  }
868  }
869  }
870 
871  return TRUE;
872 }
873 
874 /* The ticket state section:
875  * "/cib/status/tickets/ticket_state" */
876 static gboolean
877 unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set)
878 {
879  const char *ticket_id = NULL;
880  const char *granted = NULL;
881  const char *last_granted = NULL;
882  const char *standby = NULL;
883  xmlAttrPtr xIter = NULL;
884 
885  ticket_t *ticket = NULL;
886 
887  ticket_id = ID(xml_ticket);
888  if (ticket_id == NULL || strlen(ticket_id) == 0) {
889  return FALSE;
890  }
891 
892  crm_trace("Processing ticket state for %s", ticket_id);
893 
894  ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
895  if (ticket == NULL) {
896  ticket = ticket_new(ticket_id, data_set);
897  if (ticket == NULL) {
898  return FALSE;
899  }
900  }
901 
902  for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
903  const char *prop_name = (const char *)xIter->name;
904  const char *prop_value = crm_element_value(xml_ticket, prop_name);
905 
906  if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) {
907  continue;
908  }
909  g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
910  }
911 
912  granted = g_hash_table_lookup(ticket->state, "granted");
913  if (granted && crm_is_true(granted)) {
914  ticket->granted = TRUE;
915  crm_info("We have ticket '%s'", ticket->id);
916  } else {
917  ticket->granted = FALSE;
918  crm_info("We do not have ticket '%s'", ticket->id);
919  }
920 
921  last_granted = g_hash_table_lookup(ticket->state, "last-granted");
922  if (last_granted) {
923  ticket->last_granted = crm_parse_int(last_granted, 0);
924  }
925 
926  standby = g_hash_table_lookup(ticket->state, "standby");
927  if (standby && crm_is_true(standby)) {
928  ticket->standby = TRUE;
929  if (ticket->granted) {
930  crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
931  }
932  } else {
933  ticket->standby = FALSE;
934  }
935 
936  crm_trace("Done with ticket state for %s", ticket_id);
937 
938  return TRUE;
939 }
940 
941 static gboolean
942 unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set)
943 {
944  xmlNode *xml_obj = NULL;
945 
946  for (xml_obj = __xml_first_child_element(xml_tickets); xml_obj != NULL;
947  xml_obj = __xml_next_element(xml_obj)) {
948 
949  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) {
950  continue;
951  }
952  unpack_ticket_state(xml_obj, data_set);
953  }
954 
955  return TRUE;
956 }
957 
958 /* @COMPAT DC < 1.1.7: Compatibility with the deprecated ticket state section:
959  * "/cib/status/tickets/instance_attributes" */
960 static void
961 get_ticket_state_legacy(gpointer key, gpointer value, gpointer user_data)
962 {
963  const char *long_key = key;
964  char *state_key = NULL;
965 
966  const char *granted_prefix = "granted-ticket-";
967  const char *last_granted_prefix = "last-granted-";
968  static int granted_prefix_strlen = 0;
969  static int last_granted_prefix_strlen = 0;
970 
971  const char *ticket_id = NULL;
972  const char *is_granted = NULL;
973  const char *last_granted = NULL;
974  const char *sep = NULL;
975 
976  ticket_t *ticket = NULL;
977  pe_working_set_t *data_set = user_data;
978 
979  if (granted_prefix_strlen == 0) {
980  granted_prefix_strlen = strlen(granted_prefix);
981  }
982 
983  if (last_granted_prefix_strlen == 0) {
984  last_granted_prefix_strlen = strlen(last_granted_prefix);
985  }
986 
987  if (strstr(long_key, granted_prefix) == long_key) {
988  ticket_id = long_key + granted_prefix_strlen;
989  if (strlen(ticket_id)) {
990  state_key = strdup("granted");
991  is_granted = value;
992  }
993  } else if (strstr(long_key, last_granted_prefix) == long_key) {
994  ticket_id = long_key + last_granted_prefix_strlen;
995  if (strlen(ticket_id)) {
996  state_key = strdup("last-granted");
997  last_granted = value;
998  }
999  } else if ((sep = strrchr(long_key, '-'))) {
1000  ticket_id = sep + 1;
1001  state_key = strndup(long_key, strlen(long_key) - strlen(sep));
1002  }
1003 
1004  if (ticket_id == NULL || strlen(ticket_id) == 0) {
1005  free(state_key);
1006  return;
1007  }
1008 
1009  if (state_key == NULL || strlen(state_key) == 0) {
1010  free(state_key);
1011  return;
1012  }
1013 
1014  ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
1015  if (ticket == NULL) {
1016  ticket = ticket_new(ticket_id, data_set);
1017  if (ticket == NULL) {
1018  free(state_key);
1019  return;
1020  }
1021  }
1022 
1023  g_hash_table_replace(ticket->state, state_key, strdup(value));
1024 
1025  if (is_granted) {
1026  if (crm_is_true(is_granted)) {
1027  ticket->granted = TRUE;
1028  crm_info("We have ticket '%s'", ticket->id);
1029  } else {
1030  ticket->granted = FALSE;
1031  crm_info("We do not have ticket '%s'", ticket->id);
1032  }
1033 
1034  } else if (last_granted) {
1035  ticket->last_granted = crm_parse_int(last_granted, 0);
1036  }
1037 }
1038 
1039 static void
1040 unpack_handle_remote_attrs(node_t *this_node, xmlNode *state, pe_working_set_t * data_set)
1041 {
1042  const char *resource_discovery_enabled = NULL;
1043  xmlNode *attrs = NULL;
1044  resource_t *rsc = NULL;
1045 
1046  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
1047  return;
1048  }
1049 
1050  if ((this_node == NULL) || (is_remote_node(this_node) == FALSE)) {
1051  return;
1052  }
1053  crm_trace("Processing remote node id=%s, uname=%s", this_node->details->id, this_node->details->uname);
1054 
1055  this_node->details->remote_maintenance =
1057 
1058  rsc = this_node->details->remote_rsc;
1059  if (this_node->details->remote_requires_reset == FALSE) {
1060  this_node->details->unclean = FALSE;
1061  this_node->details->unseen = FALSE;
1062  }
1063  attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
1064  add_node_attrs(attrs, this_node, TRUE, data_set);
1065 
1066  if (pe__shutdown_requested(this_node)) {
1067  crm_info("Node %s is shutting down", this_node->details->uname);
1068  this_node->details->shutdown = TRUE;
1069  if (rsc) {
1070  rsc->next_role = RSC_ROLE_STOPPED;
1071  }
1072  }
1073 
1074  if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
1075  crm_info("Node %s is in standby-mode", this_node->details->uname);
1076  this_node->details->standby = TRUE;
1077  }
1078 
1079  if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) ||
1080  (rsc && !is_set(rsc->flags, pe_rsc_managed))) {
1081  crm_info("Node %s is in maintenance-mode", this_node->details->uname);
1082  this_node->details->maintenance = TRUE;
1083  }
1084 
1085  resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
1086  if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
1087  if (is_baremetal_remote_node(this_node) && is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
1088  crm_warn("ignoring %s attribute on baremetal remote node %s, disabling resource discovery requires stonith to be enabled.",
1090  } else {
1091  /* if we're here, this is either a baremetal node and fencing is enabled,
1092  * or this is a container node which we don't care if fencing is enabled
1093  * or not on. container nodes are 'fenced' by recovering the container resource
1094  * regardless of whether fencing is enabled. */
1095  crm_info("Node %s has resource discovery disabled", this_node->details->uname);
1096  this_node->details->rsc_discovery_enabled = FALSE;
1097  }
1098  }
1099 }
1100 
1101 static bool
1102 unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set)
1103 {
1104  bool changed = false;
1105  xmlNode *lrm_rsc = NULL;
1106 
1107  for (xmlNode *state = __xml_first_child_element(status); state != NULL;
1108  state = __xml_next_element(state)) {
1109 
1110  const char *id = NULL;
1111  const char *uname = NULL;
1112  node_t *this_node = NULL;
1113  bool process = FALSE;
1114 
1115  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
1116  continue;
1117  }
1118 
1119  id = crm_element_value(state, XML_ATTR_ID);
1120  uname = crm_element_value(state, XML_ATTR_UNAME);
1121  this_node = pe_find_node_any(data_set->nodes, id, uname);
1122 
1123  if (this_node == NULL) {
1124  crm_info("Node %s is unknown", id);
1125  continue;
1126 
1127  } else if (this_node->details->unpacked) {
1128  crm_info("Node %s is already processed", id);
1129  continue;
1130 
1131  } else if (is_remote_node(this_node) == FALSE && is_set(data_set->flags, pe_flag_stonith_enabled)) {
1132  // A redundant test, but preserves the order for regression tests
1133  process = TRUE;
1134 
1135  } else if (is_remote_node(this_node)) {
1136  bool check = FALSE;
1137  resource_t *rsc = this_node->details->remote_rsc;
1138 
1139  if(fence) {
1140  check = TRUE;
1141 
1142  } else if(rsc == NULL) {
1143  /* Not ready yet */
1144 
1145  } else if (is_container_remote_node(this_node)
1146  && rsc->role == RSC_ROLE_STARTED
1147  && rsc->container->role == RSC_ROLE_STARTED) {
1148  /* Both the connection and the underlying container
1149  * need to be known 'up' before we volunterily process
1150  * resources inside it
1151  */
1152  check = TRUE;
1153  crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED);
1154 
1155  } else if (is_container_remote_node(this_node) == FALSE
1156  && rsc->role == RSC_ROLE_STARTED) {
1157  check = TRUE;
1158  crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED);
1159  }
1160 
1161  if (check) {
1162  determine_remote_online_status(data_set, this_node);
1163  unpack_handle_remote_attrs(this_node, state, data_set);
1164  process = TRUE;
1165  }
1166 
1167  } else if (this_node->details->online) {
1168  process = TRUE;
1169 
1170  } else if (fence) {
1171  process = TRUE;
1172  }
1173 
1174  if(process) {
1175  crm_trace("Processing lrm resource entries on %shealthy%s node: %s",
1176  fence?"un":"", is_remote_node(this_node)?" remote":"",
1177  this_node->details->uname);
1178  changed = TRUE;
1179  this_node->details->unpacked = TRUE;
1180 
1181  lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
1182  lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
1183  unpack_lrm_resources(this_node, lrm_rsc, data_set);
1184  }
1185  }
1186  return changed;
1187 }
1188 
1189 /* remove nodes that are down, stopping */
1190 /* create +ve rsc_to_node constraints between resources and the nodes they are running on */
1191 /* anything else? */
1192 gboolean
1193 unpack_status(xmlNode * status, pe_working_set_t * data_set)
1194 {
1195  const char *id = NULL;
1196  const char *uname = NULL;
1197 
1198  xmlNode *state = NULL;
1199  node_t *this_node = NULL;
1200 
1201  crm_trace("Beginning unpack");
1202 
1203  if (data_set->tickets == NULL) {
1204  data_set->tickets =
1205  g_hash_table_new_full(crm_str_hash, g_str_equal, g_hash_destroy_str, destroy_ticket);
1206  }
1207 
1208  for (state = __xml_first_child_element(status); state != NULL;
1209  state = __xml_next_element(state)) {
1210 
1211  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) {
1212  xmlNode *xml_tickets = state;
1213  GHashTable *state_hash = NULL;
1214 
1215  /* @COMPAT DC < 1.1.7: Compatibility with the deprecated ticket state section:
1216  * Unpack the attributes in the deprecated "/cib/status/tickets/instance_attributes" if it exists. */
1217  state_hash = crm_str_table_new();
1218 
1219  unpack_instance_attributes(data_set->input, xml_tickets, XML_TAG_ATTR_SETS, NULL,
1220  state_hash, NULL, TRUE, data_set->now);
1221 
1222  g_hash_table_foreach(state_hash, get_ticket_state_legacy, data_set);
1223 
1224  if (state_hash) {
1225  g_hash_table_destroy(state_hash);
1226  }
1227 
1228  /* Unpack the new "/cib/status/tickets/ticket_state"s */
1229  unpack_tickets_state(xml_tickets, data_set);
1230  }
1231 
1232  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) {
1233  xmlNode *attrs = NULL;
1234  const char *resource_discovery_enabled = NULL;
1235 
1236  id = crm_element_value(state, XML_ATTR_ID);
1237  uname = crm_element_value(state, XML_ATTR_UNAME);
1238  this_node = pe_find_node_any(data_set->nodes, id, uname);
1239 
1240  if (uname == NULL) {
1241  /* error */
1242  continue;
1243 
1244  } else if (this_node == NULL) {
1245  crm_config_warn("Node %s in status section no longer exists", uname);
1246  continue;
1247 
1248  } else if (is_remote_node(this_node)) {
1249  /* online state for remote nodes is determined by the
1250  * rsc state after all the unpacking is done. we do however
1251  * need to mark whether or not the node has been fenced as this plays
1252  * a role during unpacking cluster node resource state */
1253  this_node->details->remote_was_fenced =
1255  continue;
1256  }
1257 
1258  crm_trace("Processing node id=%s, uname=%s", id, uname);
1259 
1260  /* Mark the node as provisionally clean
1261  * - at least we have seen it in the current cluster's lifetime
1262  */
1263  this_node->details->unclean = FALSE;
1264  this_node->details->unseen = FALSE;
1265  attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
1266  add_node_attrs(attrs, this_node, TRUE, data_set);
1267 
1268  if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
1269  crm_info("Node %s is in standby-mode", this_node->details->uname);
1270  this_node->details->standby = TRUE;
1271  }
1272 
1273  if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance"))) {
1274  crm_info("Node %s is in maintenance-mode", this_node->details->uname);
1275  this_node->details->maintenance = TRUE;
1276  }
1277 
1278  resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
1279  if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
1280  crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes",
1282  }
1283 
1284  crm_trace("determining node state");
1285  determine_online_status(state, this_node, data_set);
1286 
1287  if (is_not_set(data_set->flags, pe_flag_have_quorum)
1288  && this_node->details->online
1289  && (data_set->no_quorum_policy == no_quorum_suicide)) {
1290  /* Everything else should flow from this automatically
1291  * At least until the PE becomes able to migrate off healthy resources
1292  */
1293  pe_fence_node(data_set, this_node, "cluster does not have quorum", FALSE);
1294  }
1295  }
1296  }
1297 
1298 
1299  while(unpack_node_loop(status, FALSE, data_set)) {
1300  crm_trace("Start another loop");
1301  }
1302 
1303  // Now catch any nodes we didn't see
1304  unpack_node_loop(status, is_set(data_set->flags, pe_flag_stonith_enabled), data_set);
1305 
1306  /* Now that we know where resources are, we can schedule stops of containers
1307  * with failed bundle connections
1308  */
1309  if (data_set->stop_needed != NULL) {
1310  for (GList *item = data_set->stop_needed; item; item = item->next) {
1311  pe_resource_t *container = item->data;
1312  pe_node_t *node = pe__current_node(container);
1313 
1314  if (node) {
1315  stop_action(container, node, FALSE);
1316  }
1317  }
1318  g_list_free(data_set->stop_needed);
1319  data_set->stop_needed = NULL;
1320  }
1321 
1322  for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1323  node_t *this_node = gIter->data;
1324 
1325  if (this_node == NULL) {
1326  continue;
1327  } else if(is_remote_node(this_node) == FALSE) {
1328  continue;
1329  } else if(this_node->details->unpacked) {
1330  continue;
1331  }
1332  determine_remote_online_status(data_set, this_node);
1333  }
1334 
1335  return TRUE;
1336 }
1337 
1338 static gboolean
1339 determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state,
1340  node_t * this_node)
1341 {
1342  gboolean online = FALSE;
1343  const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
1344  const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
1345  const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
1346  const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
1347 
1348  if (!crm_is_true(in_cluster)) {
1349  crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster));
1350 
1351  } else if (safe_str_eq(is_peer, ONLINESTATUS)) {
1352  if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
1353  online = TRUE;
1354  } else {
1355  crm_debug("Node is not ready to run resources: %s", join);
1356  }
1357 
1358  } else if (this_node->details->expected_up == FALSE) {
1359  crm_trace("CRMd is down: in_cluster=%s", crm_str(in_cluster));
1360  crm_trace("\tis_peer=%s, join=%s, expected=%s",
1361  crm_str(is_peer), crm_str(join), crm_str(exp_state));
1362 
1363  } else {
1364  /* mark it unclean */
1365  pe_fence_node(data_set, this_node, "peer is unexpectedly down", FALSE);
1366  crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s",
1367  crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state));
1368  }
1369  return online;
1370 }
1371 
1372 static gboolean
1373 determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state,
1374  node_t * this_node)
1375 {
1376  gboolean online = FALSE;
1377  gboolean do_terminate = FALSE;
1378  const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
1379  const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
1380  const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
1381  const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
1382  const char *terminate = pe_node_attribute_raw(this_node, "terminate");
1383 
1384 /*
1385  - XML_NODE_IN_CLUSTER ::= true|false
1386  - XML_NODE_IS_PEER ::= true|false|online|offline
1387  - XML_NODE_JOIN_STATE ::= member|down|pending|banned
1388  - XML_NODE_EXPECTED ::= member|down
1389 */
1390 
1391  if (crm_is_true(terminate)) {
1392  do_terminate = TRUE;
1393 
1394  } else if (terminate != NULL && strlen(terminate) > 0) {
1395  /* could be a time() value */
1396  char t = terminate[0];
1397 
1398  if (t != '0' && isdigit(t)) {
1399  do_terminate = TRUE;
1400  }
1401  }
1402 
1403  crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d",
1404  this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
1405  crm_str(join), crm_str(exp_state), do_terminate);
1406 
1407  online = crm_is_true(in_cluster);
1408  if (safe_str_eq(is_peer, ONLINESTATUS)) {
1409  is_peer = XML_BOOLEAN_YES;
1410  }
1411  if (exp_state == NULL) {
1412  exp_state = CRMD_JOINSTATE_DOWN;
1413  }
1414 
1415  if (this_node->details->shutdown) {
1416  crm_debug("%s is shutting down", this_node->details->uname);
1417 
1418  /* Slightly different criteria since we can't shut down a dead peer */
1419  online = crm_is_true(is_peer);
1420 
1421  } else if (in_cluster == NULL) {
1422  pe_fence_node(data_set, this_node, "peer has not been seen by the cluster", FALSE);
1423 
1424  } else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) {
1425  pe_fence_node(data_set, this_node, "peer failed the pacemaker membership criteria", FALSE);
1426 
1427  } else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) {
1428 
1429  if (crm_is_true(in_cluster) || crm_is_true(is_peer)) {
1430  crm_info("- Node %s is not ready to run resources", this_node->details->uname);
1431  this_node->details->standby = TRUE;
1432  this_node->details->pending = TRUE;
1433 
1434  } else {
1435  crm_trace("%s is down or still coming up", this_node->details->uname);
1436  }
1437 
1438  } else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN)
1439  && crm_is_true(in_cluster) == FALSE && crm_is_true(is_peer) == FALSE) {
1440  crm_info("Node %s was just shot", this_node->details->uname);
1441  online = FALSE;
1442 
1443  } else if (crm_is_true(in_cluster) == FALSE) {
1444  // Consider `priority-fencing-delay` for lost nodes
1445  pe_fence_node(data_set, this_node, "peer is no longer part of the cluster", TRUE);
1446 
1447  } else if (crm_is_true(is_peer) == FALSE) {
1448  pe_fence_node(data_set, this_node, "peer process is no longer available", FALSE);
1449 
1450  /* Everything is running at this point, now check join state */
1451  } else if (do_terminate) {
1452  pe_fence_node(data_set, this_node, "termination was requested", FALSE);
1453 
1454  } else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
1455  crm_info("Node %s is active", this_node->details->uname);
1456 
1457  } else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING)
1458  || safe_str_eq(join, CRMD_JOINSTATE_DOWN)) {
1459  crm_info("Node %s is not ready to run resources", this_node->details->uname);
1460  this_node->details->standby = TRUE;
1461  this_node->details->pending = TRUE;
1462 
1463  } else {
1464  pe_fence_node(data_set, this_node, "peer was in an unknown state", FALSE);
1465  crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d",
1466  this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
1467  crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown);
1468  }
1469 
1470  return online;
1471 }
1472 
1473 static gboolean
1474 determine_remote_online_status(pe_working_set_t * data_set, node_t * this_node)
1475 {
1476  resource_t *rsc = this_node->details->remote_rsc;
1477  resource_t *container = NULL;
1478  pe_node_t *host = NULL;
1479 
1480  /* If there is a node state entry for a (former) Pacemaker Remote node
1481  * but no resource creating that node, the node's connection resource will
1482  * be NULL. Consider it an offline remote node in that case.
1483  */
1484  if (rsc == NULL) {
1485  this_node->details->online = FALSE;
1486  goto remote_online_done;
1487  }
1488 
1489  container = rsc->container;
1490 
1491  if (container && (g_list_length(rsc->running_on) == 1)) {
1492  host = rsc->running_on->data;
1493  }
1494 
1495  /* If the resource is currently started, mark it online. */
1496  if (rsc->role == RSC_ROLE_STARTED) {
1497  crm_trace("%s node %s presumed ONLINE because connection resource is started",
1498  (container? "Guest" : "Remote"), this_node->details->id);
1499  this_node->details->online = TRUE;
1500  }
1501 
1502  /* consider this node shutting down if transitioning start->stop */
1503  if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) {
1504  crm_trace("%s node %s shutting down because connection resource is stopping",
1505  (container? "Guest" : "Remote"), this_node->details->id);
1506  this_node->details->shutdown = TRUE;
1507  }
1508 
1509  /* Now check all the failure conditions. */
1510  if(container && is_set(container->flags, pe_rsc_failed)) {
1511  crm_trace("Guest node %s UNCLEAN because guest resource failed",
1512  this_node->details->id);
1513  this_node->details->online = FALSE;
1514  this_node->details->remote_requires_reset = TRUE;
1515 
1516  } else if(is_set(rsc->flags, pe_rsc_failed)) {
1517  crm_trace("%s node %s OFFLINE because connection resource failed",
1518  (container? "Guest" : "Remote"), this_node->details->id);
1519  this_node->details->online = FALSE;
1520 
1521  } else if (rsc->role == RSC_ROLE_STOPPED
1522  || (container && container->role == RSC_ROLE_STOPPED)) {
1523 
1524  crm_trace("%s node %s OFFLINE because its resource is stopped",
1525  (container? "Guest" : "Remote"), this_node->details->id);
1526  this_node->details->online = FALSE;
1527  this_node->details->remote_requires_reset = FALSE;
1528 
1529  } else if (host && (host->details->online == FALSE)
1530  && host->details->unclean) {
1531  crm_trace("Guest node %s UNCLEAN because host is unclean",
1532  this_node->details->id);
1533  this_node->details->online = FALSE;
1534  this_node->details->remote_requires_reset = TRUE;
1535  }
1536 
1537 remote_online_done:
1538  crm_trace("Remote node %s online=%s",
1539  this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
1540  return this_node->details->online;
1541 }
1542 
1543 gboolean
1544 determine_online_status(xmlNode * node_state, node_t * this_node, pe_working_set_t * data_set)
1545 {
1546  gboolean online = FALSE;
1547  const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
1548 
1549  if (this_node == NULL) {
1550  crm_config_err("No node to check");
1551  return online;
1552  }
1553 
1554  this_node->details->shutdown = FALSE;
1555  this_node->details->expected_up = FALSE;
1556 
1557  if (pe__shutdown_requested(this_node)) {
1558  this_node->details->shutdown = TRUE;
1559 
1560  } else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) {
1561  this_node->details->expected_up = TRUE;
1562  }
1563 
1564  if (this_node->details->type == node_ping) {
1565  this_node->details->unclean = FALSE;
1566  online = FALSE; /* As far as resource management is concerned,
1567  * the node is safely offline.
1568  * Anyone caught abusing this logic will be shot
1569  */
1570 
1571  } else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
1572  online = determine_online_status_no_fencing(data_set, node_state, this_node);
1573 
1574  } else {
1575  online = determine_online_status_fencing(data_set, node_state, this_node);
1576  }
1577 
1578  if (online) {
1579  this_node->details->online = TRUE;
1580 
1581  } else {
1582  /* remove node from contention */
1583  this_node->fixed = TRUE;
1584  this_node->weight = -INFINITY;
1585  }
1586 
1587  if (online && this_node->details->shutdown) {
1588  /* don't run resources here */
1589  this_node->fixed = TRUE;
1590  this_node->weight = -INFINITY;
1591  }
1592 
1593  if (this_node->details->type == node_ping) {
1594  crm_info("Node %s is not a pacemaker node", this_node->details->uname);
1595 
1596  } else if (this_node->details->unclean) {
1597  pe_proc_warn("Node %s is unclean", this_node->details->uname);
1598 
1599  } else if (this_node->details->online) {
1600  crm_info("Node %s is %s", this_node->details->uname,
1601  this_node->details->shutdown ? "shutting down" :
1602  this_node->details->pending ? "pending" :
1603  this_node->details->standby ? "standby" :
1604  this_node->details->maintenance ? "maintenance" : "online");
1605 
1606  } else {
1607  crm_trace("Node %s is offline", this_node->details->uname);
1608  }
1609 
1610  return online;
1611 }
1612 
1621 const char *
1622 pe_base_name_end(const char *id)
1623 {
1624  if (!crm_strlen_zero(id)) {
1625  const char *end = id + strlen(id) - 1;
1626 
1627  for (const char *s = end; s > id; --s) {
1628  switch (*s) {
1629  case '0':
1630  case '1':
1631  case '2':
1632  case '3':
1633  case '4':
1634  case '5':
1635  case '6':
1636  case '7':
1637  case '8':
1638  case '9':
1639  break;
1640  case ':':
1641  return (s == end)? s : (s - 1);
1642  default:
1643  return end;
1644  }
1645  }
1646  return end;
1647  }
1648  return NULL;
1649 }
1650 
1661 char *
1662 clone_strip(const char *last_rsc_id)
1663 {
1664  const char *end = pe_base_name_end(last_rsc_id);
1665  char *basename = NULL;
1666 
1667  CRM_ASSERT(end);
1668  basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1669  CRM_ASSERT(basename);
1670  return basename;
1671 }
1672 
1683 char *
1684 clone_zero(const char *last_rsc_id)
1685 {
1686  const char *end = pe_base_name_end(last_rsc_id);
1687  size_t base_name_len = end - last_rsc_id + 1;
1688  char *zero = NULL;
1689 
1690  CRM_ASSERT(end);
1691  zero = calloc(base_name_len + 3, sizeof(char));
1692  CRM_ASSERT(zero);
1693  memcpy(zero, last_rsc_id, base_name_len);
1694  zero[base_name_len] = ':';
1695  zero[base_name_len + 1] = '0';
1696  return zero;
1697 }
1698 
1699 static resource_t *
1700 create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set)
1701 {
1702  resource_t *rsc = NULL;
1703  xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
1704 
1705  copy_in_properties(xml_rsc, rsc_entry);
1706  crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
1707  crm_log_xml_debug(xml_rsc, "Orphan resource");
1708 
1709  if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) {
1710  return NULL;
1711  }
1712 
1713  if (xml_contains_remote_node(xml_rsc)) {
1714  node_t *node;
1715 
1716  crm_debug("Detected orphaned remote node %s", rsc_id);
1717  node = pe_find_node(data_set->nodes, rsc_id);
1718  if (node == NULL) {
1719  node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set);
1720  }
1721  link_rsc2remotenode(data_set, rsc);
1722 
1723  if (node) {
1724  crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
1725  node->details->shutdown = TRUE;
1726  }
1727  }
1728 
1729  if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
1730  /* This orphaned rsc needs to be mapped to a container. */
1731  crm_trace("Detected orphaned container filler %s", rsc_id);
1733  }
1734  set_bit(rsc->flags, pe_rsc_orphan);
1735  data_set->resources = g_list_append(data_set->resources, rsc);
1736  return rsc;
1737 }
1738 
1743 static pe_resource_t *
1744 create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id,
1745  pe_node_t *node, pe_working_set_t *data_set)
1746 {
1747  pe_resource_t *top = pe__create_clone_child(parent, data_set);
1748 
1749  // find_rsc() because we might be a cloned group
1750  pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone);
1751 
1752  pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s",
1753  top->id, parent->id, rsc_id, node->details->uname);
1754  return orphan;
1755 }
1756 
1771 static resource_t *
1772 find_anonymous_clone(pe_working_set_t * data_set, node_t * node, resource_t * parent,
1773  const char *rsc_id)
1774 {
1775  GListPtr rIter = NULL;
1776  pe_resource_t *rsc = NULL;
1777  pe_resource_t *inactive_instance = NULL;
1778  gboolean skip_inactive = FALSE;
1779 
1780  CRM_ASSERT(parent != NULL);
1781  CRM_ASSERT(pe_rsc_is_clone(parent));
1782  CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique));
1783 
1784  // Check for active (or partially active, for cloned groups) instance
1785  pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id);
1786  for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
1787  GListPtr locations = NULL;
1788  resource_t *child = rIter->data;
1789 
1790  /* Check whether this instance is already known to be active or pending
1791  * anywhere, at this stage of unpacking. Because this function is called
1792  * for a resource before the resource's individual operation history
1793  * entries are unpacked, locations will generally not contain the
1794  * desired node.
1795  *
1796  * However, there are three exceptions:
1797  * (1) when child is a cloned group and we have already unpacked the
1798  * history of another member of the group on the same node;
1799  * (2) when we've already unpacked the history of another numbered
1800  * instance on the same node (which can happen if globally-unique
1801  * was flipped from true to false); and
1802  * (3) when we re-run calculations on the same data set as part of a
1803  * simulation.
1804  */
1805  child->fns->location(child, &locations, 2);
1806  if (locations) {
1807  /* We should never associate the same numbered anonymous clone
1808  * instance with multiple nodes, and clone instances can't migrate,
1809  * so there must be only one location, regardless of history.
1810  */
1811  CRM_LOG_ASSERT(locations->next == NULL);
1812 
1813  if (((pe_node_t *)locations->data)->details == node->details) {
1814  /* This child instance is active on the requested node, so check
1815  * for a corresponding configured resource. We use find_rsc()
1816  * instead of child because child may be a cloned group, and we
1817  * need the particular member corresponding to rsc_id.
1818  *
1819  * If the history entry is orphaned, rsc will be NULL.
1820  */
1821  rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone);
1822  if (rsc) {
1823  /* If there are multiple instance history entries for an
1824  * anonymous clone in a single node's history (which can
1825  * happen if globally-unique is switched from true to
1826  * false), we want to consider the instances beyond the
1827  * first as orphans, even if there are inactive instance
1828  * numbers available.
1829  */
1830  if (rsc->running_on) {
1831  crm_notice("Active (now-)anonymous clone %s has "
1832  "multiple (orphan) instance histories on %s",
1833  parent->id, node->details->uname);
1834  skip_inactive = TRUE;
1835  rsc = NULL;
1836  } else {
1837  pe_rsc_trace(parent, "Resource %s, active", rsc->id);
1838  }
1839  }
1840  }
1841  g_list_free(locations);
1842 
1843  } else {
1844  pe_rsc_trace(parent, "Resource %s, skip inactive", child->id);
1845  if (!skip_inactive && !inactive_instance
1846  && is_not_set(child->flags, pe_rsc_block)) {
1847  // Remember one inactive instance in case we don't find active
1848  inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
1849  pe_find_clone);
1850 
1851  /* ... but don't use it if it was already associated with a
1852  * pending action on another node
1853  */
1854  if (inactive_instance && inactive_instance->pending_node
1855  && (inactive_instance->pending_node->details != node->details)) {
1856  inactive_instance = NULL;
1857  }
1858  }
1859  }
1860  }
1861 
1862  if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
1863  pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id);
1864  rsc = inactive_instance;
1865  }
1866 
1867  if (rsc == NULL) {
1868  rsc = create_anonymous_orphan(parent, rsc_id, node, data_set);
1869  pe_rsc_trace(parent, "Resource %s, orphan", rsc->id);
1870  }
1871  return rsc;
1872 }
1873 
1874 static resource_t *
1875 unpack_find_resource(pe_working_set_t * data_set, node_t * node, const char *rsc_id,
1876  xmlNode * rsc_entry)
1877 {
1878  resource_t *rsc = NULL;
1879  resource_t *parent = NULL;
1880 
1881  crm_trace("looking for %s", rsc_id);
1882  rsc = pe_find_resource(data_set->resources, rsc_id);
1883 
1884  if (rsc == NULL) {
1885  /* If we didn't find the resource by its name in the operation history,
1886  * check it again as a clone instance. Even when clone-max=0, we create
1887  * a single :0 orphan to match against here.
1888  */
1889  char *clone0_id = clone_zero(rsc_id);
1890  resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id);
1891 
1892  if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) {
1893  rsc = clone0;
1894  parent = uber_parent(clone0);
1895  crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
1896  } else {
1897  crm_trace("%s is not known as %s either (orphan)",
1898  rsc_id, clone0_id);
1899  }
1900  free(clone0_id);
1901 
1902  } else if (rsc->variant > pe_native) {
1903  crm_trace("Resource history for %s is orphaned because it is no longer primitive",
1904  rsc_id);
1905  return NULL;
1906 
1907  } else {
1908  parent = uber_parent(rsc);
1909  }
1910 
1911  if (pe_rsc_is_anon_clone(parent)) {
1912 
1913  if (pe_rsc_is_bundled(parent)) {
1914  rsc = find_container_child(parent->parent, node);
1915  } else {
1916  char *base = clone_strip(rsc_id);
1917 
1918  rsc = find_anonymous_clone(data_set, node, parent, base);
1919  free(base);
1920  CRM_ASSERT(rsc != NULL);
1921  }
1922  }
1923 
1924  if (rsc && safe_str_neq(rsc_id, rsc->id)
1925  && safe_str_neq(rsc_id, rsc->clone_name)) {
1926 
1927  free(rsc->clone_name);
1928  rsc->clone_name = strdup(rsc_id);
1929  pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
1930  rsc_id, node->details->uname, rsc->id,
1931  (is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : ""));
1932  }
1933  return rsc;
1934 }
1935 
1936 static resource_t *
1937 process_orphan_resource(xmlNode * rsc_entry, node_t * node, pe_working_set_t * data_set)
1938 {
1939  resource_t *rsc = NULL;
1940  const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
1941 
1942  crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname);
1943  rsc = create_fake_resource(rsc_id, rsc_entry, data_set);
1944 
1945  if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) {
1947 
1948  } else {
1949  CRM_CHECK(rsc != NULL, return NULL);
1950  pe_rsc_trace(rsc, "Added orphan %s", rsc->id);
1951  resource_location(rsc, NULL, -INFINITY, "__orphan_dont_run__", data_set);
1952  }
1953  return rsc;
1954 }
1955 
1956 static void
1957 process_rsc_state(resource_t * rsc, node_t * node,
1958  enum action_fail_response on_fail,
1959  xmlNode * migrate_op, pe_working_set_t * data_set)
1960 {
1961  node_t *tmpnode = NULL;
1962  char *reason = NULL;
1963 
1964  CRM_ASSERT(rsc);
1965  pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
1966  rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail));
1967 
1968  /* process current state */
1969  if (rsc->role != RSC_ROLE_UNKNOWN) {
1970  resource_t *iter = rsc;
1971 
1972  while (iter) {
1973  if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
1974  node_t *n = node_copy(node);
1975 
1976  pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name,
1977  n->details->uname);
1978  g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
1979  }
1980  if (is_set(iter->flags, pe_rsc_unique)) {
1981  break;
1982  }
1983  iter = iter->parent;
1984  }
1985  }
1986 
1987  /* If a managed resource is believed to be running, but node is down ... */
1988  if (rsc->role > RSC_ROLE_STOPPED
1989  && node->details->online == FALSE
1990  && node->details->maintenance == FALSE
1991  && is_set(rsc->flags, pe_rsc_managed)) {
1992 
1993  gboolean should_fence = FALSE;
1994 
1995  /* If this is a guest node, fence it (regardless of whether fencing is
1996  * enabled, because guest node fencing is done by recovery of the
1997  * container resource rather than by stonithd). Mark the resource
1998  * we're processing as failed. When the guest comes back up, its
1999  * operation history in the CIB will be cleared, freeing the affected
2000  * resource to run again once we are sure we know its state.
2001  */
2002  if (is_container_remote_node(node)) {
2003  set_bit(rsc->flags, pe_rsc_failed);
2004  should_fence = TRUE;
2005 
2006  } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
2007  if (is_baremetal_remote_node(node) && node->details->remote_rsc
2008  && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) {
2009 
2010  /* setting unseen = true means that fencing of the remote node will
2011  * only occur if the connection resource is not going to start somewhere.
2012  * This allows connection resources on a failed cluster-node to move to
2013  * another node without requiring the baremetal remote nodes to be fenced
2014  * as well. */
2015  node->details->unseen = TRUE;
2016  reason = crm_strdup_printf("%s is active there (fencing will be"
2017  " revoked if remote connection can "
2018  "be re-established elsewhere)",
2019  rsc->id);
2020  }
2021  should_fence = TRUE;
2022  }
2023 
2024  if (should_fence) {
2025  if (reason == NULL) {
2026  reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
2027  }
2028  pe_fence_node(data_set, node, reason, FALSE);
2029  }
2030  free(reason);
2031  }
2032 
2033  if (node->details->unclean) {
2034  /* No extra processing needed
2035  * Also allows resources to be started again after a node is shot
2036  */
2037  on_fail = action_fail_ignore;
2038  }
2039 
2040  switch (on_fail) {
2041  case action_fail_ignore:
2042  /* nothing to do */
2043  break;
2044 
2045  case action_fail_fence:
2046  /* treat it as if it is still running
2047  * but also mark the node as unclean
2048  */
2049  reason = crm_strdup_printf("%s failed there", rsc->id);
2050  pe_fence_node(data_set, node, reason, FALSE);
2051  free(reason);
2052  break;
2053 
2054  case action_fail_standby:
2055  node->details->standby = TRUE;
2056  node->details->standby_onfail = TRUE;
2057  break;
2058 
2059  case action_fail_block:
2060  /* is_managed == FALSE will prevent any
2061  * actions being sent for the resource
2062  */
2064  set_bit(rsc->flags, pe_rsc_block);
2065  break;
2066 
2067  case action_fail_migrate:
2068  /* make sure it comes up somewhere else
2069  * or not at all
2070  */
2071  resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set);
2072  break;
2073 
2074  case action_fail_stop:
2075  rsc->next_role = RSC_ROLE_STOPPED;
2076  break;
2077 
2078  case action_fail_recover:
2079  if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
2080  set_bit(rsc->flags, pe_rsc_failed);
2081  stop_action(rsc, node, FALSE);
2082  }
2083  break;
2084 
2086  set_bit(rsc->flags, pe_rsc_failed);
2087 
2088  if (rsc->container && pe_rsc_is_bundled(rsc)) {
2089  /* A bundle's remote connection can run on a different node than
2090  * the bundle's container. We don't necessarily know where the
2091  * container is running yet, so remember it and add a stop
2092  * action for it later.
2093  */
2094  data_set->stop_needed = g_list_prepend(data_set->stop_needed,
2095  rsc->container);
2096  } else if (rsc->container) {
2097  stop_action(rsc->container, node, FALSE);
2098  } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
2099  stop_action(rsc, node, FALSE);
2100  }
2101  break;
2102 
2104  set_bit(rsc->flags, pe_rsc_failed);
2105  if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
2106  tmpnode = NULL;
2107  if (rsc->is_remote_node) {
2108  tmpnode = pe_find_node(data_set->nodes, rsc->id);
2109  }
2110  if (tmpnode &&
2111  is_baremetal_remote_node(tmpnode) &&
2112  tmpnode->details->remote_was_fenced == 0) {
2113 
2114  /* connection resource to baremetal resource failed in a way that
2115  * should result in fencing the remote-node. */
2116  pe_fence_node(data_set, tmpnode,
2117  "remote connection is unrecoverable", FALSE);
2118  }
2119  }
2120 
2121  /* require the stop action regardless if fencing is occurring or not. */
2122  if (rsc->role > RSC_ROLE_STOPPED) {
2123  stop_action(rsc, node, FALSE);
2124  }
2125 
2126  /* if reconnect delay is in use, prevent the connection from exiting the
2127  * "STOPPED" role until the failure is cleared by the delay timeout. */
2128  if (rsc->remote_reconnect_interval) {
2129  rsc->next_role = RSC_ROLE_STOPPED;
2130  }
2131  break;
2132  }
2133 
2134  /* ensure a remote-node connection failure forces an unclean remote-node
2135  * to be fenced. By setting unseen = FALSE, the remote-node failure will
2136  * result in a fencing operation regardless if we're going to attempt to
2137  * reconnect to the remote-node in this transition or not. */
2138  if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) {
2139  tmpnode = pe_find_node(data_set->nodes, rsc->id);
2140  if (tmpnode && tmpnode->details->unclean) {
2141  tmpnode->details->unseen = FALSE;
2142  }
2143  }
2144 
2145  if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
2146  if (is_set(rsc->flags, pe_rsc_orphan)) {
2147  if (is_set(rsc->flags, pe_rsc_managed)) {
2148  crm_config_warn("Detected active orphan %s running on %s",
2149  rsc->id, node->details->uname);
2150  } else {
2151  crm_config_warn("Cluster configured not to stop active orphans."
2152  " %s must be stopped manually on %s",
2153  rsc->id, node->details->uname);
2154  }
2155  }
2156 
2157  native_add_running(rsc, node, data_set);
2158  if (on_fail != action_fail_ignore) {
2159  set_bit(rsc->flags, pe_rsc_failed);
2160  }
2161 
2162  } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
2163  /* Only do this for older status sections that included instance numbers
2164  * Otherwise stopped instances will appear as orphans
2165  */
2166  pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id);
2167  free(rsc->clone_name);
2168  rsc->clone_name = NULL;
2169 
2170  } else {
2171  char *key = stop_key(rsc);
2172  GListPtr possible_matches = find_actions(rsc->actions, key, node);
2173  GListPtr gIter = possible_matches;
2174 
2175  for (; gIter != NULL; gIter = gIter->next) {
2176  action_t *stop = (action_t *) gIter->data;
2177 
2178  stop->flags |= pe_action_optional;
2179  }
2180 
2181  g_list_free(possible_matches);
2182  free(key);
2183  }
2184 }
2185 
2186 /* create active recurring operations as optional */
2187 static void
2188 process_recurring(node_t * node, resource_t * rsc,
2189  int start_index, int stop_index,
2190  GListPtr sorted_op_list, pe_working_set_t * data_set)
2191 {
2192  int counter = -1;
2193  const char *task = NULL;
2194  const char *status = NULL;
2195  GListPtr gIter = sorted_op_list;
2196 
2197  CRM_ASSERT(rsc);
2198  pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index);
2199 
2200  for (; gIter != NULL; gIter = gIter->next) {
2201  xmlNode *rsc_op = (xmlNode *) gIter->data;
2202 
2203  int interval = 0;
2204  char *key = NULL;
2205  const char *id = ID(rsc_op);
2206  const char *interval_s = NULL;
2207 
2208  counter++;
2209 
2210  if (node->details->online == FALSE) {
2211  pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname);
2212  break;
2213 
2214  /* Need to check if there's a monitor for role="Stopped" */
2215  } else if (start_index < stop_index && counter <= stop_index) {
2216  pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname);
2217  continue;
2218 
2219  } else if (counter < start_index) {
2220  pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter);
2221  continue;
2222  }
2223 
2224  interval_s = crm_element_value(rsc_op, XML_LRM_ATTR_INTERVAL);
2225  interval = crm_parse_int(interval_s, "0");
2226  if (interval == 0) {
2227  pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname);
2228  continue;
2229  }
2230 
2231  status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
2232  if (safe_str_eq(status, "-1")) {
2233  pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname);
2234  continue;
2235  }
2236  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2237  /* create the action */
2238  key = generate_op_key(rsc->id, task, interval);
2239  pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname);
2240  custom_action(rsc, key, task, node, TRUE, TRUE, data_set);
2241  }
2242 }
2243 
2244 void
2245 calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
2246 {
2247  int counter = -1;
2248  int implied_monitor_start = -1;
2249  int implied_master_start = -1;
2250  const char *task = NULL;
2251  const char *status = NULL;
2252  GListPtr gIter = sorted_op_list;
2253 
2254  *stop_index = -1;
2255  *start_index = -1;
2256 
2257  for (; gIter != NULL; gIter = gIter->next) {
2258  xmlNode *rsc_op = (xmlNode *) gIter->data;
2259 
2260  counter++;
2261 
2262  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2263  status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
2264 
2265  if (safe_str_eq(task, CRMD_ACTION_STOP)
2266  && safe_str_eq(status, "0")) {
2267  *stop_index = counter;
2268 
2269  } else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2270  *start_index = counter;
2271 
2272  } else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
2273  const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
2274 
2275  if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) {
2276  implied_monitor_start = counter;
2277  }
2278  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
2279  implied_master_start = counter;
2280  }
2281  }
2282 
2283  if (*start_index == -1) {
2284  if (implied_master_start != -1) {
2285  *start_index = implied_master_start;
2286  } else if (implied_monitor_start != -1) {
2287  *start_index = implied_monitor_start;
2288  }
2289  }
2290 }
2291 
2292 static resource_t *
2293 unpack_lrm_rsc_state(node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set)
2294 {
2295  GListPtr gIter = NULL;
2296  int stop_index = -1;
2297  int start_index = -1;
2298  enum rsc_role_e req_role = RSC_ROLE_UNKNOWN;
2299 
2300  const char *task = NULL;
2301  const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
2302 
2303  resource_t *rsc = NULL;
2304  GListPtr op_list = NULL;
2305  GListPtr sorted_op_list = NULL;
2306 
2307  xmlNode *migrate_op = NULL;
2308  xmlNode *rsc_op = NULL;
2309  xmlNode *last_failure = NULL;
2310 
2311  enum action_fail_response on_fail = FALSE;
2312  enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN;
2313 
2314  crm_trace("[%s] Processing %s on %s",
2315  crm_element_name(rsc_entry), rsc_id, node->details->uname);
2316 
2317  /* extract operations */
2318  op_list = NULL;
2319  sorted_op_list = NULL;
2320 
2321  for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL;
2322  rsc_op = __xml_next_element(rsc_op)) {
2323  if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
2324  op_list = g_list_prepend(op_list, rsc_op);
2325  }
2326  }
2327 
2328  if (op_list == NULL) {
2329  /* if there are no operations, there is nothing to do */
2330  return NULL;
2331  }
2332 
2333  /* find the resource */
2334  rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
2335  if (rsc == NULL) {
2336  rsc = process_orphan_resource(rsc_entry, node, data_set);
2337  }
2338  CRM_ASSERT(rsc != NULL);
2339 
2340  /* process operations */
2341  saved_role = rsc->role;
2342  on_fail = action_fail_ignore;
2343  rsc->role = RSC_ROLE_UNKNOWN;
2344  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2345 
2346  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2347  xmlNode *rsc_op = (xmlNode *) gIter->data;
2348 
2349  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2350  if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2351  migrate_op = rsc_op;
2352  }
2353 
2354  unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set);
2355  }
2356 
2357  /* create active recurring operations as optional */
2358  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2359  process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set);
2360 
2361  /* no need to free the contents */
2362  g_list_free(sorted_op_list);
2363 
2364  process_rsc_state(rsc, node, on_fail, migrate_op, data_set);
2365 
2366  if (get_target_role(rsc, &req_role)) {
2367  if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) {
2368  pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s"
2369  " with requested next role %s",
2370  rsc->id, role2text(rsc->next_role), role2text(req_role));
2371  rsc->next_role = req_role;
2372 
2373  } else if (req_role > rsc->next_role) {
2374  pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s"
2375  " with requested next role %s",
2376  rsc->id, role2text(rsc->next_role), role2text(req_role));
2377  }
2378  }
2379 
2380  if (saved_role > rsc->role) {
2381  rsc->role = saved_role;
2382  }
2383 
2384  return rsc;
2385 }
2386 
2387 static void
2388 handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
2389 {
2390  xmlNode *rsc_entry = NULL;
2391  for (rsc_entry = __xml_first_child_element(lrm_rsc_list); rsc_entry != NULL;
2392  rsc_entry = __xml_next_element(rsc_entry)) {
2393 
2394  resource_t *rsc;
2395  resource_t *container;
2396  const char *rsc_id;
2397  const char *container_id;
2398 
2399  if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) {
2400  continue;
2401  }
2402 
2403  container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
2404  rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
2405  if (container_id == NULL || rsc_id == NULL) {
2406  continue;
2407  }
2408 
2409  container = pe_find_resource(data_set->resources, container_id);
2410  if (container == NULL) {
2411  continue;
2412  }
2413 
2414  rsc = pe_find_resource(data_set->resources, rsc_id);
2415  if (rsc == NULL ||
2416  is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE ||
2417  rsc->container != NULL) {
2418  continue;
2419  }
2420 
2421  pe_rsc_trace(rsc, "Mapped orphaned rsc %s's container to %s", rsc->id, container_id);
2422  rsc->container = container;
2423  container->fillers = g_list_append(container->fillers, rsc);
2424  }
2425 }
2426 
2427 gboolean
2428 unpack_lrm_resources(node_t * node, xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
2429 {
2430  xmlNode *rsc_entry = NULL;
2431  gboolean found_orphaned_container_filler = FALSE;
2432 
2433  CRM_CHECK(node != NULL, return FALSE);
2434 
2435  crm_trace("Unpacking resources on %s", node->details->uname);
2436 
2437  for (rsc_entry = __xml_first_child_element(lrm_rsc_list); rsc_entry != NULL;
2438  rsc_entry = __xml_next_element(rsc_entry)) {
2439 
2440  if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
2441  resource_t *rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set);
2442  if (!rsc) {
2443  continue;
2444  }
2445  if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) {
2446  found_orphaned_container_filler = TRUE;
2447  }
2448  }
2449  }
2450 
2451  /* now that all the resource state has been unpacked for this node
2452  * we have to go back and map any orphaned container fillers to their
2453  * container resource */
2454  if (found_orphaned_container_filler) {
2455  handle_orphaned_container_fillers(lrm_rsc_list, data_set);
2456  }
2457  return TRUE;
2458 }
2459 
2460 static void
2461 set_active(resource_t * rsc)
2462 {
2463  resource_t *top = uber_parent(rsc);
2464 
2465  if (top && top->variant == pe_master) {
2466  rsc->role = RSC_ROLE_SLAVE;
2467  } else {
2468  rsc->role = RSC_ROLE_STARTED;
2469  }
2470 }
2471 
2472 static void
2473 set_node_score(gpointer key, gpointer value, gpointer user_data)
2474 {
2475  node_t *node = value;
2476  int *score = user_data;
2477 
2478  node->weight = *score;
2479 }
2480 
2481 #define STATUS_PATH_MAX 1024
2482 static xmlNode *
2483 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
2484  bool success_only, pe_working_set_t *data_set)
2485 {
2486  int offset = 0;
2487  char xpath[STATUS_PATH_MAX];
2488  xmlNode *xml = NULL;
2489 
2490  offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node);
2491  offset +=
2492  snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']",
2493  resource);
2494 
2495  /* Need to check against transition_magic too? */
2496  if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) {
2497  offset +=
2498  snprintf(xpath + offset, STATUS_PATH_MAX - offset,
2499  "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op,
2500  source);
2501  } else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) {
2502  offset +=
2503  snprintf(xpath + offset, STATUS_PATH_MAX - offset,
2504  "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op,
2505  source);
2506  } else {
2507  offset +=
2508  snprintf(xpath + offset, STATUS_PATH_MAX - offset,
2509  "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op);
2510  }
2511 
2512  CRM_LOG_ASSERT(offset > 0);
2513  xml = get_xpath_object(xpath, data_set->input, LOG_DEBUG);
2514 
2515  if (xml && success_only) {
2516  int rc = PCMK_OCF_UNKNOWN_ERROR;
2517  int status = PCMK_LRM_OP_ERROR;
2518 
2521  if ((rc != PCMK_OCF_OK) || (status != PCMK_LRM_OP_DONE)) {
2522  return NULL;
2523  }
2524  }
2525  return xml;
2526 }
2527 
2528 static int
2529 pe__call_id(xmlNode *op_xml)
2530 {
2531  int id = 0;
2532 
2533  if (op_xml) {
2535  }
2536  return id;
2537 }
2538 
2555 static bool
2556 stop_happened_after(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
2557  pe_working_set_t *data_set)
2558 {
2559  xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP,
2560  node->details->uname, NULL, TRUE, data_set);
2561 
2562  return (stop_op && (pe__call_id(stop_op) > pe__call_id(xml_op)));
2563 }
2564 
2565 static void
2566 unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
2567  pe_working_set_t *data_set)
2568 {
2569  /* A successful migration sequence is:
2570  * migrate_to on source node
2571  * migrate_from on target node
2572  * stop on source node
2573  *
2574  * If a migrate_to is followed by a stop, the entire migration (successful
2575  * or failed) is complete, and we don't care what happened on the target.
2576  *
2577  * If no migrate_from has happened, the migration is considered to be
2578  * "partial". If the migrate_from failed, make sure the resource gets
2579  * stopped on both source and target (if up).
2580  *
2581  * If the migrate_to and migrate_from both succeeded (which also implies the
2582  * resource is no longer running on the source), but there is no stop, the
2583  * migration is considered to be "dangling". Schedule a stop on the source
2584  * in this case.
2585  */
2586  int from_rc = 0;
2587  int from_status = 0;
2588  pe_node_t *target_node = NULL;
2589  pe_node_t *source_node = NULL;
2590  xmlNode *migrate_from = NULL;
2591  const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
2592  const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
2593 
2594  if (stop_happened_after(rsc, node, xml_op, data_set)) {
2595  return;
2596  }
2597 
2598  // Clones are not allowed to migrate, so role can't be master
2599  rsc->role = RSC_ROLE_STARTED;
2600 
2601  target_node = pe_find_node(data_set->nodes, target);
2602  source_node = pe_find_node(data_set->nodes, source);
2603 
2604  // Check whether there was a migrate_from action on the target
2605  migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target,
2606  source, FALSE, data_set);
2607  if (migrate_from) {
2608  crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
2609  crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status);
2610  pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d",
2611  ID(migrate_from), target, from_status, from_rc);
2612  }
2613 
2614  if (migrate_from && from_rc == PCMK_OCF_OK
2615  && from_status == PCMK_LRM_OP_DONE) {
2616  /* The migrate_to and migrate_from both succeeded, so mark the migration
2617  * as "dangling". This will be used to schedule a stop action on the
2618  * source without affecting the target.
2619  */
2620  pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op),
2621  source);
2622  rsc->role = RSC_ROLE_STOPPED;
2623  rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
2624 
2625  } else if (migrate_from && (from_status != PCMK_LRM_OP_PENDING)) { // Failed
2626  if (target_node && target_node->details->online) {
2627  pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node,
2628  target_node->details->online);
2629  native_add_running(rsc, target_node, data_set);
2630  }
2631 
2632  } else { // Pending, or complete but erased
2633  if (target_node && target_node->details->online) {
2634  pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node,
2635  target_node->details->online);
2636 
2637  native_add_running(rsc, target_node, data_set);
2638  if (source_node && source_node->details->online) {
2639  /* This is a partial migration: the migrate_to completed
2640  * successfully on the source, but the migrate_from has not
2641  * completed. Remember the source and target; if the newly
2642  * chosen target remains the same when we schedule actions
2643  * later, we may continue with the migration.
2644  */
2645  rsc->partial_migration_target = target_node;
2646  rsc->partial_migration_source = source_node;
2647  }
2648  } else {
2649  /* Consider it failed here - forces a restart, prevents migration */
2650  set_bit(rsc->flags, pe_rsc_failed);
2652  }
2653  }
2654 }
2655 
2656 // Is there an action_name in node_name's rsc history newer than call_id?
2657 static bool
2658 newer_op(pe_resource_t *rsc, const char *action_name, const char *node_name,
2659  int call_id, pe_working_set_t *data_set)
2660 {
2661  xmlNode *action = find_lrm_op(rsc->id, action_name, node_name, NULL, TRUE,
2662  data_set);
2663 
2664  return pe__call_id(action) > call_id;
2665 }
2666 
2667 static void
2668 unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
2669  pe_working_set_t *data_set)
2670 {
2671  int target_stop_id = 0;
2672  int target_migrate_from_id = 0;
2673  xmlNode *target_stop = NULL;
2674  xmlNode *target_migrate_from = NULL;
2675  const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
2676  const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
2677 
2678  /* If a migration failed, we have to assume the resource is active. Clones
2679  * are not allowed to migrate, so role can't be master.
2680  */
2681  rsc->role = RSC_ROLE_STARTED;
2682 
2683  // Check for stop on the target
2684  target_stop = find_lrm_op(rsc->id, CRMD_ACTION_STOP, target, NULL,
2685  TRUE, data_set);
2686  target_stop_id = pe__call_id(target_stop);
2687 
2688  // Check for migrate_from on the target
2689  target_migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target,
2690  source, TRUE, data_set);
2691  target_migrate_from_id = pe__call_id(target_migrate_from);
2692 
2693  if ((target_stop == NULL) || (target_stop_id < target_migrate_from_id)) {
2694  /* There was no stop on the target, or a stop that happened before a
2695  * migrate_from, so assume the resource is still active on the target
2696  * (if it is up).
2697  */
2698  node_t *target_node = pe_find_node(data_set->nodes, target);
2699 
2700  pe_rsc_trace(rsc, "stop (%d) + migrate_from (%d)",
2701  target_stop_id, target_migrate_from_id);
2702  if (target_node && target_node->details->online) {
2703  native_add_running(rsc, target_node, data_set);
2704  }
2705 
2706  } else if (target_migrate_from == NULL) {
2707  /* We know there was a stop on the target, but there may not have been a
2708  * migrate_from (the stop could have happened before migrate_from was
2709  * scheduled or attempted).
2710  *
2711  * That means this could be a "dangling" migration. But first, check
2712  * whether there is a newer successful stop, start, or migrate_from on
2713  * the source node -- it's possible the failed migration was followed by
2714  * a successful stop, full restart, or migration in the reverse
2715  * direction, in which case we don't want to force a stop.
2716  */
2717  int source_migrate_to_id = pe__call_id(xml_op);
2718 
2719  if (newer_op(rsc, CRMD_ACTION_MIGRATED, source, source_migrate_to_id,
2720  data_set)
2721  || newer_op(rsc, CRMD_ACTION_START, source, source_migrate_to_id,
2722  data_set)
2723  || newer_op(rsc, CRMD_ACTION_STOP, source, source_migrate_to_id,
2724  data_set)) {
2725  return;
2726  }
2727 
2728  // Mark node as having dangling migration so we can force a stop later
2729  rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
2730  }
2731 }
2732 
2733 static void
2734 unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node,
2735  xmlNode *xml_op, pe_working_set_t *data_set)
2736 {
2737  xmlNode *source_stop = NULL;
2738  xmlNode *source_migrate_to = NULL;
2739  const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
2740  const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
2741 
2742  /* If a migration failed, we have to assume the resource is active. Clones
2743  * are not allowed to migrate, so role can't be master.
2744  */
2745  rsc->role = RSC_ROLE_STARTED;
2746 
2747  // Check for a stop on the source
2748  source_stop = find_lrm_op(rsc->id, CRMD_ACTION_STOP, source, NULL,
2749  TRUE, data_set);
2750 
2751  // Check for a migrate_to on the source
2752  source_migrate_to = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE,
2753  source, target, TRUE, data_set);
2754 
2755  if ((source_stop == NULL)
2756  || (pe__call_id(source_stop) < pe__call_id(source_migrate_to))) {
2757  /* There was no stop on the source, or a stop that happened before
2758  * migrate_to, so assume the resource is still active on the source (if
2759  * it is up).
2760  */
2761  pe_node_t *source_node = pe_find_node(data_set->nodes, source);
2762 
2763  if (source_node && source_node->details->online) {
2764  native_add_running(rsc, source_node, data_set);
2765  }
2766  }
2767 }
2768 
2769 static void
2770 record_failed_op(xmlNode *op, node_t* node, resource_t *rsc, pe_working_set_t * data_set)
2771 {
2772  xmlNode *xIter = NULL;
2773  const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY);
2774 
2775  if (node->details->online == FALSE) {
2776  return;
2777  }
2778 
2779  for (xIter = data_set->failed->children; xIter; xIter = xIter->next) {
2780  const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY);
2781  const char *uname = crm_element_value(xIter, XML_ATTR_UNAME);
2782 
2783  if(safe_str_eq(op_key, key) && safe_str_eq(uname, node->details->uname)) {
2784  crm_trace("Skipping duplicate entry %s on %s", op_key, node->details->uname);
2785  return;
2786  }
2787  }
2788 
2789  crm_trace("Adding entry %s on %s", op_key, node->details->uname);
2790  crm_xml_add(op, XML_ATTR_UNAME, node->details->uname);
2791  crm_xml_add(op, XML_LRM_ATTR_RSCID, rsc->id);
2792  add_node_copy(data_set->failed, op);
2793 }
2794 
2795 static const char *get_op_key(xmlNode *xml_op)
2796 {
2797  const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
2798  if(key == NULL) {
2799  key = ID(xml_op);
2800  }
2801  return key;
2802 }
2803 
2804 static void
2805 unpack_rsc_op_failure(resource_t * rsc, node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure,
2806  enum action_fail_response * on_fail, pe_working_set_t * data_set)
2807 {
2808  int interval = 0;
2809  bool is_probe = FALSE;
2810  action_t *action = NULL;
2811 
2812  const char *key = get_op_key(xml_op);
2813  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
2814  const char *op_version = crm_element_value(xml_op, XML_ATTR_CRM_VERSION);
2815 
2816  CRM_ASSERT(rsc);
2817 
2818  *last_failure = xml_op;
2819 
2820  crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
2821  if(interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) {
2822  is_probe = TRUE;
2823  pe_rsc_trace(rsc, "is a probe: %s", key);
2824  }
2825 
2826  if (rc != PCMK_OCF_NOT_INSTALLED || is_set(data_set->flags, pe_flag_symmetric_cluster)) {
2827  crm_warn("Processing failed %s of %s on %s: %s " CRM_XS " rc=%d",
2828  (is_probe? "probe" : task), rsc->id, node->details->uname,
2829  services_ocf_exitcode_str(rc), rc);
2830 
2831  if (is_probe && (rc != PCMK_OCF_OK)
2832  && (rc != PCMK_OCF_NOT_RUNNING)
2833  && (rc != PCMK_OCF_RUNNING_MASTER)) {
2834 
2835  /* A failed (not just unexpected) probe result could mean the user
2836  * didn't know resources will be probed even where they can't run.
2837  */
2838  crm_notice("If it is not possible for %s to run on %s, see "
2839  "the resource-discovery option for location constraints",
2840  rsc->id, node->details->uname);
2841  }
2842 
2843  record_failed_op(xml_op, node, rsc, data_set);
2844 
2845  } else {
2846  crm_trace("Processing failed op %s for %s on %s: %s (%d)",
2847  task, rsc->id, node->details->uname, services_ocf_exitcode_str(rc),
2848  rc);
2849  }
2850 
2851  action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
2852  if ((action->on_fail <= action_fail_fence && *on_fail < action->on_fail) ||
2853  (action->on_fail == action_fail_reset_remote && *on_fail <= action_fail_recover) ||
2854  (action->on_fail == action_fail_restart_container && *on_fail <= action_fail_recover) ||
2855  (*on_fail == action_fail_restart_container && action->on_fail >= action_fail_migrate)) {
2856  pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail),
2857  fail2text(action->on_fail), action->uuid, key);
2858  *on_fail = action->on_fail;
2859  }
2860 
2861  if (safe_str_eq(task, CRMD_ACTION_STOP)) {
2862  resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set);
2863 
2864  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
2865  unpack_migrate_to_failure(rsc, node, xml_op, data_set);
2866 
2867  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2868  unpack_migrate_from_failure(rsc, node, xml_op, data_set);
2869 
2870  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
2871  rsc->role = RSC_ROLE_MASTER;
2872 
2873  } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
2874  if (action->on_fail == action_fail_block) {
2875  rsc->role = RSC_ROLE_MASTER;
2876  rsc->next_role = RSC_ROLE_STOPPED;
2877 
2878  } else if(rc == PCMK_OCF_NOT_RUNNING) {
2879  rsc->role = RSC_ROLE_STOPPED;
2880 
2881  } else {
2882  /*
2883  * Staying in master role would put the PE/TE into a loop. Setting
2884  * slave role is not dangerous because the resource will be stopped
2885  * as part of recovery, and any master promotion will be ordered
2886  * after that stop.
2887  */
2888  rsc->role = RSC_ROLE_SLAVE;
2889  }
2890 
2891  } else if (compare_version("2.0", op_version) > 0 && safe_str_eq(task, CRMD_ACTION_START)) {
2892  crm_warn("Compatibility handling for failed op %s on %s", key, node->details->uname);
2893  resource_location(rsc, node, -INFINITY, "__legacy_start__", data_set);
2894  }
2895 
2896  if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) {
2897  /* leave stopped */
2898  pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id);
2899  rsc->role = RSC_ROLE_STOPPED;
2900 
2901  } else if (rsc->role < RSC_ROLE_STARTED) {
2902  pe_rsc_trace(rsc, "Setting %s active", rsc->id);
2903  set_active(rsc);
2904  }
2905 
2906  pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
2907  rsc->id, role2text(rsc->role),
2908  node->details->unclean ? "true" : "false",
2909  fail2text(action->on_fail), role2text(action->fail_role));
2910 
2911  if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) {
2912  rsc->next_role = action->fail_role;
2913  }
2914 
2915  if (action->fail_role == RSC_ROLE_STOPPED) {
2916  int score = -INFINITY;
2917 
2918  resource_t *fail_rsc = rsc;
2919 
2920  if (fail_rsc->parent) {
2921  resource_t *parent = uber_parent(fail_rsc);
2922 
2923  if (pe_rsc_is_clone(parent)
2924  && is_not_set(parent->flags, pe_rsc_unique)) {
2925  /* for clone and master resources, if a child fails on an operation
2926  * with on-fail = stop, all the resources fail. Do this by preventing
2927  * the parent from coming up again. */
2928  fail_rsc = parent;
2929  }
2930  }
2931  crm_warn("Making sure %s doesn't come up again", fail_rsc->id);
2932  /* make sure it doesn't come up again */
2933  if (fail_rsc->allowed_nodes != NULL) {
2934  g_hash_table_destroy(fail_rsc->allowed_nodes);
2935  }
2936  fail_rsc->allowed_nodes = node_hash_from_list(data_set->nodes);
2937  g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
2938  }
2939 
2940  pe_free_action(action);
2941 }
2942 
2943 static int
2944 determine_op_status(
2945  resource_t *rsc, int rc, int target_rc, node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set)
2946 {
2947  int interval = 0;
2948  int result = PCMK_LRM_OP_DONE;
2949 
2950  const char *key = get_op_key(xml_op);
2951  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
2952 
2953  bool is_probe = FALSE;
2954 
2955  CRM_ASSERT(rsc);
2956  crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
2957  if (interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) {
2958  is_probe = TRUE;
2959  }
2960 
2961  if (target_rc >= 0 && target_rc != rc) {
2962  result = PCMK_LRM_OP_ERROR;
2963  pe_rsc_debug(rsc, "%s on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
2964  key, node->details->uname,
2965  services_ocf_exitcode_str(rc), rc,
2966  services_ocf_exitcode_str(target_rc), target_rc);
2967  }
2968 
2969  /* we could clean this up significantly except for old LRMs and CRMs that
2970  * didn't include target_rc and liked to remap status
2971  */
2972  switch (rc) {
2973  case PCMK_OCF_OK:
2974  if (is_probe && target_rc == 7) {
2975  result = PCMK_LRM_OP_DONE;
2976  pe_rsc_info(rsc, "Operation %s found resource %s active on %s",
2977  task, rsc->id, node->details->uname);
2978 
2979  /* legacy code for pre-0.6.5 operations */
2980  } else if (target_rc < 0 && interval > 0 && rsc->role == RSC_ROLE_MASTER) {
2981  /* catch status ops that return 0 instead of 8 while they
2982  * are supposed to be in master mode
2983  */
2984  result = PCMK_LRM_OP_ERROR;
2985  }
2986  break;
2987 
2988  case PCMK_OCF_NOT_RUNNING:
2989  if (is_probe || target_rc == rc || is_not_set(rsc->flags, pe_rsc_managed)) {
2990  result = PCMK_LRM_OP_DONE;
2991  rsc->role = RSC_ROLE_STOPPED;
2992 
2993  /* clear any previous failure actions */
2994  *on_fail = action_fail_ignore;
2995  rsc->next_role = RSC_ROLE_UNKNOWN;
2996 
2997  } else if (safe_str_neq(task, CRMD_ACTION_STOP)) {
2998  result = PCMK_LRM_OP_ERROR;
2999  }
3000  break;
3001 
3003  if (is_probe) {
3004  result = PCMK_LRM_OP_DONE;
3005  pe_rsc_info(rsc, "Operation %s found resource %s active in master mode on %s",
3006  task, rsc->id, node->details->uname);
3007 
3008  } else if (target_rc == rc) {
3009  /* nothing to do */
3010 
3011  } else if (target_rc >= 0) {
3012  result = PCMK_LRM_OP_ERROR;
3013 
3014  /* legacy code for pre-0.6.5 operations */
3015  } else if (safe_str_neq(task, CRMD_ACTION_STATUS)
3016  || rsc->role != RSC_ROLE_MASTER) {
3017  result = PCMK_LRM_OP_ERROR;
3018  if (rsc->role != RSC_ROLE_MASTER) {
3019  crm_err("%s reported %s in master mode on %s",
3020  key, rsc->id, node->details->uname);
3021  }
3022  }
3023  rsc->role = RSC_ROLE_MASTER;
3024  break;
3025 
3028  rsc->role = RSC_ROLE_MASTER;
3029  result = PCMK_LRM_OP_ERROR;
3030  break;
3031 
3033  result = PCMK_LRM_OP_ERROR_FATAL;
3034  break;
3035 
3040  if (rc == PCMK_OCF_UNIMPLEMENT_FEATURE && interval > 0) {
3041  result = PCMK_LRM_OP_NOTSUPPORTED;
3042  break;
3043 
3044  } else if (pe_can_fence(data_set, node) == FALSE
3045  && safe_str_eq(task, CRMD_ACTION_STOP)) {
3046  /* If a stop fails and we can't fence, there's nothing else we can do */
3047  pe_proc_err("No further recovery can be attempted for %s: %s action failed with '%s' (%d)",
3048  rsc->id, task, services_ocf_exitcode_str(rc), rc);
3050  set_bit(rsc->flags, pe_rsc_block);
3051  }
3052  result = PCMK_LRM_OP_ERROR_HARD;
3053  break;
3054 
3055  default:
3056  if (result == PCMK_LRM_OP_DONE) {
3057  crm_info("Treating %s (rc=%d) on %s as an ERROR",
3058  key, rc, node->details->uname);
3059  result = PCMK_LRM_OP_ERROR;
3060  }
3061  }
3062 
3063  return result;
3064 }
3065 
3066 static bool check_operation_expiry(resource_t *rsc, node_t *node, int rc, xmlNode *xml_op, pe_working_set_t * data_set)
3067 {
3068  bool expired = FALSE;
3069  time_t last_failure = 0;
3070  int interval = 0;
3071  int failure_timeout = rsc->failure_timeout;
3072  const char *key = get_op_key(xml_op);
3073  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
3074  const char *clear_reason = NULL;
3075 
3076  /* clearing recurring monitor operation failures automatically
3077  * needs to be carefully considered */
3078  if (safe_str_eq(crm_element_value(xml_op, XML_LRM_ATTR_TASK), "monitor") &&
3080 
3081  /* TODO, in the future we should consider not clearing recurring monitor
3082  * op failures unless the last action for a resource was a "stop" action.
3083  * otherwise it is possible that clearing the monitor failure will result
3084  * in the resource being in an undeterministic state.
3085  *
3086  * For now we handle this potential undeterministic condition for remote
3087  * node connection resources by not clearing a recurring monitor op failure
3088  * until after the node has been fenced. */
3089 
3090  if (is_set(data_set->flags, pe_flag_stonith_enabled) &&
3091  (rsc->remote_reconnect_interval)) {
3092 
3093  node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
3094  if (remote_node && remote_node->details->remote_was_fenced == 0) {
3095  if (strstr(ID(xml_op), "last_failure")) {
3096  crm_info("Waiting to clear monitor failure for remote node %s until fencing has occurred", rsc->id);
3097  }
3098  /* disabling failure timeout for this operation because we believe
3099  * fencing of the remote node should occur first. */
3100  failure_timeout = 0;
3101  }
3102  }
3103  }
3104 
3105  if (failure_timeout > 0) {
3106  int last_run = 0;
3107 
3108  if (crm_element_value_int(xml_op, XML_RSC_OP_LAST_CHANGE, &last_run) == 0) {
3109  time_t now = get_effective_time(data_set);
3110 
3111  if (now > (last_run + failure_timeout)) {
3112  expired = TRUE;
3113  }
3114  }
3115  }
3116 
3117  if (expired) {
3118  if (pe_get_failcount(node, rsc, &last_failure, pe_fc_default, xml_op,
3119  data_set)) {
3120 
3121  // There is a fail count ignoring timeout
3122 
3123  if (pe_get_failcount(node, rsc, &last_failure, pe_fc_effective,
3124  xml_op, data_set) == 0) {
3125  // There is no fail count considering timeout
3126  clear_reason = "it expired";
3127 
3128  } else {
3129  expired = FALSE;
3130  }
3131 
3132  } else if (rsc->remote_reconnect_interval
3133  && strstr(ID(xml_op), "last_failure")) {
3134  // Always clear last failure when reconnect interval is set
3135  clear_reason = "reconnect interval is set";
3136  }
3137 
3138  } else if (strstr(ID(xml_op), "last_failure") &&
3139  ((strcmp(task, "start") == 0) || (strcmp(task, "monitor") == 0))) {
3140 
3141  if (container_fix_remote_addr(rsc)) {
3142  /* We haven't allocated resources yet, so we can't reliably
3143  * substitute addr parameters for the REMOTE_CONTAINER_HACK.
3144  * When that's needed, defer the check until later.
3145  */
3146  pe__add_param_check(xml_op, rsc, node, pe_check_last_failure,
3147  data_set);
3148 
3149  } else {
3150  op_digest_cache_t *digest_data = NULL;
3151 
3152  digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set);
3153  switch (digest_data->rc) {
3154  case RSC_DIGEST_UNKNOWN:
3155  crm_trace("Resource %s history entry %s on %s has no digest to compare",
3156  rsc->id, key, node->details->id);
3157  break;
3158  case RSC_DIGEST_MATCH:
3159  break;
3160  default:
3161  clear_reason = "resource parameters have changed";
3162  break;
3163  }
3164  }
3165  }
3166 
3167  if (clear_reason != NULL) {
3168  node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
3169  pe_action_t *clear_op = pe__clear_failcount(rsc, node, clear_reason,
3170  data_set);
3171 
3172  if (is_set(data_set->flags, pe_flag_stonith_enabled)
3174  && remote_node
3175  && remote_node->details->unclean) {
3176 
3177  pe_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL, FALSE, data_set);
3178  crm_notice("Waiting for %s to complete before clearing %s failure for remote node %s", fence?fence->uuid:"nil", task, rsc->id);
3179 
3180  order_actions(fence, clear_op, pe_order_implies_then);
3181  }
3182  }
3183 
3184  crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
3185  if(expired && interval == 0 && safe_str_eq(task, CRMD_ACTION_STATUS)) {
3186  switch(rc) {
3187  case PCMK_OCF_OK:
3188  case PCMK_OCF_NOT_RUNNING:
3190  case PCMK_OCF_DEGRADED:
3192  /* Don't expire probes that return these values */
3193  expired = FALSE;
3194  break;
3195  }
3196  }
3197 
3198  return expired;
3199 }
3200 
3201 int get_target_rc(xmlNode *xml_op)
3202 {
3203  int dummy = 0;
3204  int target_rc = 0;
3205  char *dummy_string = NULL;
3206  const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
3207  if (key == NULL) {
3208  return -1;
3209  }
3210 
3211  decode_transition_key(key, &dummy_string, &dummy, &dummy, &target_rc);
3212  free(dummy_string);
3213 
3214  return target_rc;
3215 }
3216 
3217 static enum action_fail_response
3218 get_action_on_fail(resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set)
3219 {
3220  int result = action_fail_recover;
3221  action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
3222 
3223  result = action->on_fail;
3224  pe_free_action(action);
3225 
3226  return result;
3227 }
3228 
3229 static void
3230 update_resource_state(resource_t * rsc, node_t * node, xmlNode * xml_op, const char * task, int rc,
3231  xmlNode * last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set)
3232 {
3233  gboolean clear_past_failure = FALSE;
3234 
3235  CRM_ASSERT(rsc);
3236  CRM_ASSERT(xml_op);
3237 
3238  if (rc == PCMK_OCF_NOT_RUNNING) {
3239  clear_past_failure = TRUE;
3240 
3241  } else if (rc == PCMK_OCF_NOT_INSTALLED) {
3242  rsc->role = RSC_ROLE_STOPPED;
3243 
3244  } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) {
3245  if (last_failure) {
3246  const char *op_key = get_op_key(xml_op);
3247  const char *last_failure_key = get_op_key(last_failure);
3248 
3249  if (safe_str_eq(op_key, last_failure_key)) {
3250  clear_past_failure = TRUE;
3251  }
3252  }
3253 
3254  if (rsc->role < RSC_ROLE_STARTED) {
3255  set_active(rsc);
3256  }
3257 
3258  } else if (safe_str_eq(task, CRMD_ACTION_START)) {
3259  rsc->role = RSC_ROLE_STARTED;
3260  clear_past_failure = TRUE;
3261 
3262  } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
3263  rsc->role = RSC_ROLE_STOPPED;
3264  clear_past_failure = TRUE;
3265 
3266  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
3267  rsc->role = RSC_ROLE_MASTER;
3268  clear_past_failure = TRUE;
3269 
3270  } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
3271  /* Demote from Master does not clear an error */
3272  rsc->role = RSC_ROLE_SLAVE;
3273 
3274  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
3275  rsc->role = RSC_ROLE_STARTED;
3276  clear_past_failure = TRUE;
3277 
3278  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
3279  unpack_migrate_to_success(rsc, node, xml_op, data_set);
3280 
3281  } else if (rsc->role < RSC_ROLE_STARTED) {
3282  pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname);
3283  set_active(rsc);
3284  }
3285 
3286  /* clear any previous failure actions */
3287  if (clear_past_failure) {
3288  switch (*on_fail) {
3289  case action_fail_stop:
3290  case action_fail_fence:
3291  case action_fail_migrate:
3292  case action_fail_standby:
3293  pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop",
3294  rsc->id, fail2text(*on_fail));
3295  break;
3296 
3297  case action_fail_block:
3298  case action_fail_ignore:
3299  case action_fail_recover:
3301  *on_fail = action_fail_ignore;
3302  rsc->next_role = RSC_ROLE_UNKNOWN;
3303  break;
3305  if (rsc->remote_reconnect_interval == 0) {
3306  /* when reconnect delay is not in use, the connection is allowed
3307  * to start again after the remote node is fenced and completely
3308  * stopped. Otherwise, with reconnect delay we wait for the failure
3309  * to be cleared entirely before reconnected can be attempted. */
3310  *on_fail = action_fail_ignore;
3311  rsc->next_role = RSC_ROLE_UNKNOWN;
3312  }
3313  break;
3314  }
3315  }
3316 }
3317 
3318 
3319 gboolean
3320 unpack_rsc_op(resource_t * rsc, node_t * node, xmlNode * xml_op, xmlNode ** last_failure,
3321  enum action_fail_response * on_fail, pe_working_set_t * data_set)
3322 {
3323  int task_id = 0;
3324 
3325  const char *key = NULL;
3326  const char *task = NULL;
3327  const char *task_key = NULL;
3328 
3329  int rc = 0;
3330  int status = PCMK_LRM_OP_PENDING-1;
3331  int target_rc = get_target_rc(xml_op);
3332  int interval = 0;
3333 
3334  gboolean expired = FALSE;
3335  resource_t *parent = rsc;
3336  enum action_fail_response failure_strategy = action_fail_recover;
3337 
3338  CRM_CHECK(rsc != NULL, return FALSE);
3339  CRM_CHECK(node != NULL, return FALSE);
3340  CRM_CHECK(xml_op != NULL, return FALSE);
3341 
3342  task_key = get_op_key(xml_op);
3343 
3344  task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
3346 
3347  crm_element_value_int(xml_op, XML_LRM_ATTR_RC, &rc);
3348  crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
3349  crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status);
3350  crm_element_value_int(xml_op, XML_LRM_ATTR_INTERVAL, &interval);
3351 
3352  CRM_CHECK(task != NULL, return FALSE);
3353  CRM_CHECK(status <= PCMK_LRM_OP_NOT_INSTALLED, return FALSE);
3354  CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return FALSE);
3355 
3356  if (safe_str_eq(task, CRMD_ACTION_NOTIFY) ||
3358  /* safe to ignore these */
3359  return TRUE;
3360  }
3361 
3362  if (is_not_set(rsc->flags, pe_rsc_unique)) {
3363  parent = uber_parent(rsc);
3364  }
3365 
3366  pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)",
3367  task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role));
3368 
3369  if (node->details->unclean) {
3370  pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean."
3371  " Further action depends on the value of the stop's on-fail attribute",
3372  node->details->uname, rsc->id);
3373  }
3374 
3375  if(status != PCMK_LRM_OP_NOT_INSTALLED) {
3376  expired = check_operation_expiry(rsc, node, rc, xml_op, data_set);
3377  }
3378 
3379  /* Degraded results are informational only, re-map them to their error-free equivalents */
3380  if (rc == PCMK_OCF_DEGRADED && safe_str_eq(task, CRMD_ACTION_STATUS)) {
3381  rc = PCMK_OCF_OK;
3382 
3383  /* Add them to the failed list to highlight them for the user */
3384  if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
3385  crm_trace("Remapping %d to %d", PCMK_OCF_DEGRADED, PCMK_OCF_OK);
3386  record_failed_op(xml_op, node, rsc, data_set);
3387  }
3388 
3389  } else if (rc == PCMK_OCF_DEGRADED_MASTER && safe_str_eq(task, CRMD_ACTION_STATUS)) {
3391 
3392  /* Add them to the failed list to highlight them for the user */
3393  if ((node->details->shutdown == FALSE) || (node->details->online == TRUE)) {
3395  record_failed_op(xml_op, node, rsc, data_set);
3396  }
3397  }
3398 
3399  if (expired && target_rc != rc) {
3400  const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC);
3401 
3402  pe_rsc_debug(rsc, "Expired operation '%s' on %s returned '%s' (%d) instead of the expected value: '%s' (%d)",
3403  key, node->details->uname,
3404  services_ocf_exitcode_str(rc), rc,
3405  services_ocf_exitcode_str(target_rc), target_rc);
3406 
3407  if(interval == 0) {
3408  crm_notice("Ignoring expired calculated failure %s (rc=%d, magic=%s) on %s",
3409  task_key, rc, magic, node->details->uname);
3410  goto done;
3411 
3412  } else if(node->details->online && node->details->unclean == FALSE) {
3413  crm_notice("Re-initiated expired calculated failure %s (rc=%d, magic=%s) on %s",
3414  task_key, rc, magic, node->details->uname);
3415  /* This is SO horrible, but we don't have access to CancelXmlOp() yet */
3416  crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout");
3417  goto done;
3418  }
3419  }
3420 
3421  if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) {
3422  status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set);
3423  }
3424 
3425  pe_rsc_trace(rsc, "Handling status: %d", status);
3426  switch (status) {
3427  case PCMK_LRM_OP_CANCELLED:
3428  /* do nothing?? */
3429  pe_err("Don't know what to do for cancelled ops yet");
3430  break;
3431 
3432  case PCMK_LRM_OP_PENDING:
3433  if (safe_str_eq(task, CRMD_ACTION_START)) {
3435  set_active(rsc);
3436 
3437  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
3438  rsc->role = RSC_ROLE_MASTER;
3439 
3440  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE) && node->details->unclean) {
3441  /* If a pending migrate_to action is out on a unclean node,
3442  * we have to force the stop action on the target. */
3443  const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
3444  node_t *target = pe_find_node(data_set->nodes, migrate_target);
3445  if (target) {
3446  stop_action(rsc, target, FALSE);
3447  }
3448  }
3449 
3450  if (rsc->pending_task == NULL) {
3451  if (safe_str_eq(task, CRMD_ACTION_STATUS) && interval == 0) {
3452  /* Pending probes are not printed, even if pending
3453  * operations are requested. If someone ever requests that
3454  * behavior, uncomment this and the corresponding part of
3455  * native.c:native_pending_task().
3456  */
3457  /*rsc->pending_task = strdup("probe");*/
3458  /*rsc->pending_node = node;*/
3459  } else {
3460  rsc->pending_task = strdup(task);
3461  rsc->pending_node = node;
3462  }
3463  }
3464  break;
3465 
3466  case PCMK_LRM_OP_DONE:
3467  pe_rsc_trace(rsc, "%s/%s completed on %s", rsc->id, task, node->details->uname);
3468  update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set);
3469  break;
3470 
3472  failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
3473  if (failure_strategy == action_fail_ignore) {
3474  crm_warn("Cannot ignore failed %s (status=%d, rc=%d) on %s: "
3475  "Resource agent doesn't exist",
3476  task_key, status, rc, node->details->uname);
3477  /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */
3478  *on_fail = action_fail_migrate;
3479  }
3480  resource_location(parent, node, -INFINITY, "hard-error", data_set);
3481  unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
3482  break;
3483 
3484  case PCMK_LRM_OP_ERROR:
3487  case PCMK_LRM_OP_TIMEOUT:
3489 
3490  failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
3491  if ((failure_strategy == action_fail_ignore)
3492  || (failure_strategy == action_fail_restart_container
3493  && safe_str_eq(task, CRMD_ACTION_STOP))) {
3494 
3495  crm_warn("Pretending the failure of %s (rc=%d) on %s succeeded",
3496  task_key, rc, node->details->uname);
3497 
3498  update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, on_fail, data_set);
3499  crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
3501 
3502  record_failed_op(xml_op, node, rsc, data_set);
3503 
3504  if (failure_strategy == action_fail_restart_container && *on_fail <= action_fail_recover) {
3505  *on_fail = failure_strategy;
3506  }
3507 
3508  } else {
3509  unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
3510 
3511  if(status == PCMK_LRM_OP_ERROR_HARD) {
3512  do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE,
3513  "Preventing %s from re-starting on %s: operation %s failed '%s' (%d)",
3514  parent->id, node->details->uname,
3515  task, services_ocf_exitcode_str(rc), rc);
3516 
3517  resource_location(parent, node, -INFINITY, "hard-error", data_set);
3518 
3519  } else if(status == PCMK_LRM_OP_ERROR_FATAL) {
3520  crm_err("Preventing %s from re-starting anywhere: operation %s failed '%s' (%d)",
3521  parent->id, task, services_ocf_exitcode_str(rc), rc);
3522 
3523  resource_location(parent, NULL, -INFINITY, "fatal-error", data_set);
3524  }
3525  }
3526  break;
3527  }
3528 
3529  done:
3530  pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s", rsc->id, task, role2text(rsc->role), role2text(rsc->next_role));
3531  return TRUE;
3532 }
3533 
3534 gboolean
3535 add_node_attrs(xmlNode * xml_obj, node_t * node, gboolean overwrite, pe_working_set_t * data_set)
3536 {
3537  const char *cluster_name = NULL;
3538 
3539  g_hash_table_insert(node->details->attrs,
3540  strdup(CRM_ATTR_UNAME), strdup(node->details->uname));
3541 
3542  g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID),
3543  strdup(node->details->id));
3544  if (safe_str_eq(node->details->id, data_set->dc_uuid)) {
3545  data_set->dc_node = node;
3546  node->details->is_dc = TRUE;
3547  g_hash_table_insert(node->details->attrs,
3548  strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE));
3549  } else {
3550  g_hash_table_insert(node->details->attrs,
3551  strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE));
3552  }
3553 
3554  cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name");
3555  if (cluster_name) {
3556  g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME),
3557  strdup(cluster_name));
3558  }
3559 
3560  unpack_instance_attributes(data_set->input, xml_obj, XML_TAG_ATTR_SETS, NULL,
3561  node->details->attrs, NULL, overwrite, data_set->now);
3562 
3563  if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) {
3564  const char *site_name = pe_node_attribute_raw(node, "site-name");
3565 
3566  if (site_name) {
3567  g_hash_table_insert(node->details->attrs,
3568  strdup(CRM_ATTR_SITE_NAME),
3569  strdup(site_name));
3570 
3571  } else if (cluster_name) {
3572  /* Default to cluster-name if unset */
3573  g_hash_table_insert(node->details->attrs,
3574  strdup(CRM_ATTR_SITE_NAME),
3575  strdup(cluster_name));
3576  }
3577  }
3578  return TRUE;
3579 }
3580 
3581 static GListPtr
3582 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
3583 {
3584  int counter = -1;
3585  int stop_index = -1;
3586  int start_index = -1;
3587 
3588  xmlNode *rsc_op = NULL;
3589 
3590  GListPtr gIter = NULL;
3591  GListPtr op_list = NULL;
3592  GListPtr sorted_op_list = NULL;
3593 
3594  /* extract operations */
3595  op_list = NULL;
3596  sorted_op_list = NULL;
3597 
3598  for (rsc_op = __xml_first_child_element(rsc_entry);
3599  rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
3600  if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
3601  crm_xml_add(rsc_op, "resource", rsc);
3602  crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
3603  op_list = g_list_prepend(op_list, rsc_op);
3604  }
3605  }
3606 
3607  if (op_list == NULL) {
3608  /* if there are no operations, there is nothing to do */
3609  return NULL;
3610  }
3611 
3612  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
3613 
3614  /* create active recurring operations as optional */
3615  if (active_filter == FALSE) {
3616  return sorted_op_list;
3617  }
3618 
3619  op_list = NULL;
3620 
3621  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
3622 
3623  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
3624  xmlNode *rsc_op = (xmlNode *) gIter->data;
3625 
3626  counter++;
3627 
3628  if (start_index < stop_index) {
3629  crm_trace("Skipping %s: not active", ID(rsc_entry));
3630  break;
3631 
3632  } else if (counter < start_index) {
3633  crm_trace("Skipping %s: old", ID(rsc_op));
3634  continue;
3635  }
3636  op_list = g_list_append(op_list, rsc_op);
3637  }
3638 
3639  g_list_free(sorted_op_list);
3640  return op_list;
3641 }
3642 
3643 GListPtr
3644 find_operations(const char *rsc, const char *node, gboolean active_filter,
3645  pe_working_set_t * data_set)
3646 {
3647  GListPtr output = NULL;
3648  GListPtr intermediate = NULL;
3649 
3650  xmlNode *tmp = NULL;
3651  xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE);
3652 
3653  node_t *this_node = NULL;
3654 
3655  xmlNode *node_state = NULL;
3656 
3657  for (node_state = __xml_first_child_element(status); node_state != NULL;
3658  node_state = __xml_next_element(node_state)) {
3659 
3660  if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
3661  const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
3662 
3663  if (node != NULL && safe_str_neq(uname, node)) {
3664  continue;
3665  }
3666 
3667  this_node = pe_find_node(data_set->nodes, uname);
3668  if(this_node == NULL) {
3669  CRM_LOG_ASSERT(this_node != NULL);
3670  continue;
3671 
3672  } else if (is_remote_node(this_node)) {
3673  determine_remote_online_status(data_set, this_node);
3674 
3675  } else {
3676  determine_online_status(node_state, this_node, data_set);
3677  }
3678 
3679  if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
3680  /* offline nodes run no resources...
3681  * unless stonith is enabled in which case we need to
3682  * make sure rsc start events happen after the stonith
3683  */
3684  xmlNode *lrm_rsc = NULL;
3685 
3686  tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
3687  tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
3688 
3689  for (lrm_rsc = __xml_first_child_element(tmp); lrm_rsc != NULL;
3690  lrm_rsc = __xml_next_element(lrm_rsc)) {
3691  if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) {
3692 
3693  const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
3694 
3695  if (rsc != NULL && safe_str_neq(rsc_id, rsc)) {
3696  continue;
3697  }
3698 
3699  intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
3700  output = g_list_concat(output, intermediate);
3701  }
3702  }
3703  }
3704  }
3705  }
3706 
3707  return output;
3708 }
GHashTable * tags
Definition: status.h:148
Services API.
gboolean unpack_config(xmlNode *config, pe_working_set_t *data_set)
Definition: unpack.c:180
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:190
GListPtr nodes
Definition: status.h:125
#define XML_RSC_OP_LAST_CHANGE
Definition: msg_xml.h:308
gboolean unpack_rsc_op(resource_t *rsc, node_t *node, xmlNode *xml_op, xmlNode **last_failure, enum action_fail_response *failed, pe_working_set_t *data_set)
Definition: unpack.c:3320
void verify_pe_options(GHashTable *options)
Definition: common.c:209
#define STATUS_PATH_MAX
Definition: unpack.c:2481
xmlNode * find_xml_node(xmlNode *cib, const char *node_path, gboolean must_find)
Definition: xml.c:1765
const char * uname
Definition: status.h:173
A dumping ground.
#define crm_notice(fmt, args...)
Definition: logging.h:276
#define CRMD_ACTION_MIGRATED
Definition: crm.h:165
xmlNode * failed
Definition: status.h:133
#define pe_rsc_debug(rsc, fmt, args...)
Definition: internal.h:15
#define pe_flag_have_stonith_resource
Definition: status.h:75
node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t *data_set)
Definition: unpack.c:375
gboolean safe_str_neq(const char *a, const char *b)
Definition: strings.c:182
#define INFINITY
Definition: crm.h:73
gint sort_rsc_priority(gconstpointer a, gconstpointer b)
Definition: utils.c:450
gboolean determine_online_status(xmlNode *node_state, node_t *this_node, pe_working_set_t *data_set)
Definition: unpack.c:1544
#define CRM_ATTR_KIND
Definition: crm.h:90
gboolean get_target_role(resource_t *rsc, enum rsc_role_e *role)
Definition: utils.c:1802
#define XML_NODE_IS_FENCED
Definition: msg_xml.h:277
#define XML_ATTR_TRANSITION_MAGIC
Definition: msg_xml.h:390
node_t * node_copy(const node_t *this_node)
Definition: utils.c:141
#define CRM_ATTR_IS_DC
Definition: crm.h:92
#define stop_action(rsc, node, optional)
Definition: internal.h:224
void pe__add_param_check(xmlNode *rsc_op, pe_resource_t *rsc, pe_node_t *node, enum pe_check_parameters, pe_working_set_t *data_set)
Definition: remote.c:217
#define pe_flag_enable_unfencing
Definition: status.h:76
#define pe_rsc_orphan_container_filler
Definition: status.h:222
int default_resource_stickiness
Definition: status.h:116
const char * id
Definition: status.h:172
char * clone_strip(const char *last_rsc_id)
Definition: unpack.c:1662
#define XML_ATTR_QUORUM_PANIC
Definition: msg_xml.h:89
int weight
Definition: status.h:210
#define XML_ATTR_TYPE
Definition: msg_xml.h:105
#define XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY
Definition: msg_xml.h:381
bool pe_can_fence(pe_working_set_t *data_set, node_t *node)
Definition: utils.c:98
#define XML_TAG_UTILIZATION
Definition: msg_xml.h:193
time_t last_granted
Definition: status.h:430
#define pe_flag_have_remote_nodes
Definition: status.h:89
void(* free)(resource_t *)
Definition: complex.h:41
bool container_fix_remote_addr(resource_t *rsc)
Definition: container.c:753
#define XML_RULE_ATTR_SCORE
Definition: msg_xml.h:324
#define XML_BOOLEAN_FALSE
Definition: msg_xml.h:118
#define crm_config_err(fmt...)
Definition: crm_internal.h:225
int get_target_rc(xmlNode *xml_op)
Definition: unpack.c:3201
int priority_fencing_delay
Definition: status.h:156
enum action_fail_response on_fail
Definition: status.h:383
#define pe_rsc_orphan
Definition: status.h:219
int char2score(const char *score)
Definition: utils.c:221
#define pe_proc_warn(fmt...)
Definition: internal.h:21
#define XML_TAG_TRANSIENT_NODEATTRS
Definition: msg_xml.h:396
#define CRMD_ACTION_NOTIFY
Definition: crm.h:178
#define pe_flag_startup_probes
Definition: status.h:87
long long crm_get_msec(const char *input)
Definition: utils.c:589
GListPtr running_rsc
Definition: status.h:187
GListPtr find_actions(GListPtr input, const char *key, const node_t *on_node)
Definition: utils.c:1519
gboolean common_unpack(xmlNode *xml_obj, resource_t **rsc, resource_t *parent, pe_working_set_t *data_set)
Definition: complex.c:463
enum pe_obj_types variant
Definition: status.h:300
#define XML_LRM_ATTR_INTERVAL
Definition: msg_xml.h:287
#define XML_CIB_TAG_TAG
Definition: msg_xml.h:423
#define XML_LRM_TAG_RESOURCE
Definition: msg_xml.h:253
#define pe_flag_stop_rsc_orphans
Definition: status.h:79
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
Definition: nvpair.c:216
gboolean pending
Definition: status.h:179
node_t * partial_migration_source
Definition: status.h:341
#define CRMD_ACTION_PROMOTE
Definition: crm.h:173
int crm_parse_int(const char *text, const char *default_text)
Parse an integer value from a string.
Definition: strings.c:157
gboolean fixed
Definition: status.h:211
GListPtr resources
Definition: status.h:126
#define XML_NVPAIR_ATTR_NAME
Definition: msg_xml.h:370
#define XML_NODE_IS_MAINTENANCE
Definition: msg_xml.h:278
gint sort_op_by_callid(gconstpointer a, gconstpointer b)
Definition: utils.c:1672
#define XML_NODE_EXPECTED
Definition: msg_xml.h:273
node_t * pe_find_node(GListPtr node_list, const char *uname)
Definition: status.c:444
#define XML_CIB_TAG_RSC_TEMPLATE
Definition: msg_xml.h:202
AIS_Host host
Definition: internal.h:80
time_t get_effective_time(pe_working_set_t *data_set)
Definition: utils.c:1787
no_quorum_policy_t no_quorum_policy
Definition: status.h:117
#define CRM_LOG_ASSERT(expr)
Definition: logging.h:176
const char * pe_pref(GHashTable *options, const char *name)
Definition: common.c:215
char * clone_name
Definition: status.h:293
xmlNode * params_restart
Definition: internal.h:323
resource_t * uber_parent(resource_t *rsc)
Definition: complex.c:904
resource_t * remote_rsc
Definition: status.h:190
#define clear_bit(word, bit)
Definition: crm_internal.h:211
void copy_in_properties(xmlNode *target, xmlNode *src)
Definition: xml.c:1837
#define CRMD_JOINSTATE_NACK
Definition: crm.h:158
#define XML_CIB_TAG_LRM
Definition: msg_xml.h:251
#define CRM_ATTR_CLUSTER_NAME
Definition: crm.h:93
GHashTable * tickets
Definition: status.h:120
node_t * dc_node
Definition: status.h:109
enum rsc_role_e role
Definition: status.h:330
#define pe_rsc_allow_migrate
Definition: status.h:245
int crm_element_value_int(const xmlNode *data, const char *name, int *dest)
Retrieve the integer value of an XML attribute.
Definition: nvpair.c:428
GListPtr children
Definition: status.h:337
#define XPATH_ENABLE_UNFENCING
Definition: unpack.c:158
xmlNode * get_xpath_object(const char *xpath, xmlNode *xml_obj, int error_level)
Definition: xpath.c:224
#define pe_proc_err(fmt...)
Definition: internal.h:20
action_fail_response
Definition: common.h:29
char * strndup(const char *str, size_t len)
char * dc_uuid
Definition: status.h:108
gboolean is_remote_node
Definition: status.h:314
int stonith_timeout
Definition: status.h:115
gboolean standby
Definition: status.h:177
#define XML_CIB_TAG_PROPSET
Definition: msg_xml.h:184
char * id
Definition: status.h:292
gboolean decode_transition_key(const char *key, char **uuid, int *action, int *transition_id, int *target_rc)
Definition: operations.c:233
resource_t * find_container_child(const resource_t *bundle, const node_t *node)
Definition: container.c:1175
#define XML_LRM_ATTR_RSCID
Definition: msg_xml.h:297
gboolean unpack_resources(xmlNode *xml_resources, pe_working_set_t *data_set)
Definition: unpack.c:765
#define CRMD_ACTION_START
Definition: crm.h:167
#define XML_LRM_ATTR_TASK_KEY
Definition: msg_xml.h:289
#define pe_rsc_block
Definition: status.h:221
#define XML_TAG_ATTR_SETS
Definition: msg_xml.h:185
GHashTable * utilization
Definition: status.h:195
#define XML_LRM_ATTR_TASK
Definition: msg_xml.h:288
const char * pe_base_name_end(const char *id)
Definition: unpack.c:1622
const char * role2text(enum rsc_role_e role)
Definition: common.c:365
char uname[MAX_NAME]
Definition: internal.h:81
gboolean is_remote_node(node_t *node)
Definition: remote.c:52
pe_node_t *(* location)(const pe_resource_t *, GList **, int)
Definition: complex.h:40
#define CRMD_ACTION_STOP
Definition: crm.h:170
struct node_shared_s * details
Definition: status.h:213
gboolean unpack_status(xmlNode *status, pe_working_set_t *data_set)
Definition: unpack.c:1193
#define CRMD_JOINSTATE_DOWN
Definition: crm.h:155
#define crm_warn(fmt, args...)
Definition: logging.h:275
#define CRMD_ACTION_DEMOTE
Definition: crm.h:175
#define set_bit(word, bit)
Definition: crm_internal.h:210
#define crm_atoi(text, default_text)
Definition: util.h:112
gboolean unclean
Definition: status.h:180
xmlNode * pe_create_remote_xml(xmlNode *parent, const char *uname, const char *container_id, const char *migrateable, const char *is_managed, const char *interval, const char *monitor_timeout, const char *start_timeout, const char *server, const char *port)
Definition: remote.c:148
uint32_t id
Definition: internal.h:76
#define crm_debug(fmt, args...)
Definition: logging.h:279
void native_add_running(resource_t *rsc, node_t *node, pe_working_set_t *data_set)
Definition: native.c:82
#define XML_RSC_ATTR_CONTAINER
Definition: msg_xml.h:230
Utility functions.
#define XML_ATTR_ID
Definition: msg_xml.h:102
char * pending_task
Definition: status.h:346
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:393
#define XML_CIB_TAG_RESOURCE
Definition: msg_xml.h:196
gboolean unpack_nodes(xmlNode *xml_nodes, pe_working_set_t *data_set)
Definition: unpack.c:548
#define XML_BOOLEAN_TRUE
Definition: msg_xml.h:117
#define XML_CIB_TAG_STATE
Definition: msg_xml.h:178
#define pe_rsc_failed
Definition: status.h:237
char * digest_all_calc
Definition: internal.h:324
#define stop_key(rsc)
Definition: internal.h:223
pe_node_t * pending_node
Definition: status.h:354
node_t * partial_migration_target
Definition: status.h:340
#define pe_flag_startup_fencing
Definition: status.h:85
resource_object_functions_t * fns
Definition: status.h:301
resource_t * container
Definition: status.h:343
#define CRM_ATTR_UNAME
Definition: crm.h:88
GHashTable * allowed_nodes
Definition: status.h:328
GHashTable * digest_cache
Definition: status.h:198
#define set_config_flag(data_set, option, flag)
Definition: unpack.c:35
#define XML_NODE_IS_PEER
Definition: msg_xml.h:275
#define crm_trace(fmt, args...)
Definition: logging.h:280
#define CRMD_JOINSTATE_MEMBER
Definition: crm.h:157
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:129
enum rsc_digest_cmp_val rc
Definition: internal.h:320
gboolean is_baremetal_remote_node(node_t *node)
Definition: remote.c:34
#define pe_rsc_is_container
Definition: status.h:250
char * digest_secure_calc
Definition: internal.h:325
gboolean unpack_remote_nodes(xmlNode *xml_resources, pe_working_set_t *data_set)
Definition: unpack.c:636
gboolean add_node_attrs(xmlNode *xml_obj, node_t *node, gboolean overwrite, pe_working_set_t *data_set)
Definition: unpack.c:3535
gboolean is_container_remote_node(node_t *node)
Definition: remote.c:43
xmlNode * add_node_copy(xmlNode *new_parent, xmlNode *xml_node)
Definition: xml.c:1955
GListPtr refs
Definition: status.h:437
gboolean unpacked
Definition: status.h:205
const char * stonith_action
Definition: status.h:110
#define crm_log_xml_debug(xml, text)
Definition: logging.h:287
bool pe__shutdown_requested(pe_node_t *node)
Definition: utils.c:2682
#define XML_TAG_META_SETS
Definition: msg_xml.h:186
Wrappers for and extensions to libxml2.
GHashTable * config_hash
Definition: status.h:119
#define XML_ATTR_UNAME
Definition: msg_xml.h:130
gboolean add_tag_ref(GHashTable *tags, const char *tag_name, const char *obj_ref)
Definition: utils.c:2552
#define XML_BOOLEAN_YES
Definition: msg_xml.h:119
#define XML_RSC_ATTR_MANAGED
Definition: msg_xml.h:219
xmlNode * create_xml_node(xmlNode *parent, const char *name)
Definition: xml.c:1977
gboolean is_dc
Definition: status.h:184
char * clone_zero(const char *last_rsc_id)
Definition: unpack.c:1684
action_t * custom_action(resource_t *rsc, char *key, const char *task, node_t *on_node, gboolean optional, gboolean foo, pe_working_set_t *data_set)
Definition: utils.c:477
unsigned long long flags
Definition: status.h:316
const char * pe_node_attribute_raw(pe_node_t *node, const char *name)
Definition: common.c:497
#define pe_flag_maintenance_mode
Definition: status.h:72
resource_t * parent
Definition: status.h:298
pe_resource_t * pe__create_clone_child(pe_resource_t *rsc, pe_working_set_t *data_set)
Definition: clone.c:58
void pe_fence_node(pe_working_set_t *data_set, pe_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
Definition: unpack.c:79
#define XML_LRM_ATTR_MIGRATE_TARGET
Definition: msg_xml.h:314
#define CIB_OPTIONS_FIRST
Definition: msg_xml.h:53
#define XML_RSC_ATTR_REMOTE_NODE
Definition: msg_xml.h:233
char * uuid
Definition: status.h:378
#define XML_LRM_ATTR_RESTART_DIGEST
Definition: msg_xml.h:304
GListPtr dangling_migrations
Definition: status.h:338
void free_xml(xmlNode *child)
Definition: xml.c:2108
#define pe_flag_stop_everything
Definition: status.h:81
xmlNode * input
Definition: status.h:104
gboolean crm_str_eq(const char *a, const char *b, gboolean use_case)
Definition: strings.c:245
#define XML_CIB_TAG_NODE
Definition: msg_xml.h:179
GListPtr fillers
Definition: status.h:344
const char * placement_strategy
Definition: status.h:111
gboolean unseen
Definition: status.h:181
int failure_timeout
Definition: status.h:310
xmlNode * params_all
Definition: internal.h:321
uint32_t counter
Definition: internal.h:78
int remote_reconnect_interval
Definition: status.h:350
gboolean remote_maintenance
Definition: status.h:204
#define crm_config_warn(fmt...)
Definition: crm_internal.h:226
GListPtr actions
Definition: status.h:322
#define XML_ATTR_TRANSITION_KEY
Definition: msg_xml.h:391
#define CRM_XS
Definition: logging.h:42
gboolean maintenance
Definition: status.h:200
#define pe_rsc_unique
Definition: status.h:225
GHashTable * node_hash_from_list(GListPtr list)
Definition: utils.c:197
const char * localhost
Definition: status.h:147
GHashTable * meta
Definition: status.h:333
gboolean xml_contains_remote_node(xmlNode *xml)
Definition: remote.c:81
node_t * pe_find_node_any(GListPtr node_list, const char *id, const char *uname)
Definition: status.c:416
const char * fail2text(enum action_fail_response fail)
Definition: common.c:221
#define pe_flag_quick_location
Definition: status.h:91
#define pe_rsc_start_pending
Definition: status.h:240
#define XML_LRM_TAG_RESOURCES
Definition: msg_xml.h:252
gboolean standby_onfail
Definition: status.h:178
#define crm_err(fmt, args...)
Definition: logging.h:274
resource_t *(* find_rsc)(resource_t *parent, const char *search, node_t *node, int flags)
Definition: complex.h:34
#define XML_CIB_TAG_TICKET_STATE
Definition: msg_xml.h:420
void resource_location(resource_t *rsc, node_t *node, int score, const char *tag, pe_working_set_t *data_set)
Definition: utils.c:1628
xmlXPathObjectPtr xpath_search(xmlNode *xml_top, const char *path)
Definition: xpath.c:145
pe_action_t * pe__clear_failcount(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_working_set_t *data_set)
Schedule a controller operation to clear a fail count.
Definition: failcounts.c:346
ticket_t * ticket_new(const char *ticket_id, pe_working_set_t *data_set)
Definition: utils.c:1919
GHashTable * attrs
Definition: status.h:192
bool remote_id_conflict(const char *remote_name, pe_working_set_t *data)
Definition: unpack.c:437
enum rsc_role_e next_role
Definition: status.h:331
gboolean online
Definition: status.h:176
#define XML_ATTR_HAVE_WATCHDOG
Definition: msg_xml.h:91
#define XML_NODE_ATTR_RSC_DISCOVERY
Definition: msg_xml.h:374
gboolean shutdown
Definition: status.h:182
int compare_version(const char *version1, const char *version2)
Definition: utils.c:477
gboolean rsc_discovery_enabled
Definition: status.h:201
#define pe_flag_remove_after_stop
Definition: status.h:84
#define CRMD_ACTION_METADATA
Definition: crm.h:182
#define pe_rsc_failure_ignored
Definition: status.h:247
xmlNode * params_secure
Definition: internal.h:322
#define XML_LRM_ATTR_CALLID
Definition: msg_xml.h:300
#define pe_rsc_managed
Definition: status.h:220
#define CRMD_ACTION_MIGRATE
Definition: crm.h:164
#define XML_NVPAIR_ATTR_VALUE
Definition: msg_xml.h:371
int node_score_red
Definition: utils.c:63
#define crm_str_hash
Definition: util.h:75
#define uint32_t
Definition: stdint.in.h:158
enum rsc_role_e fail_role
Definition: status.h:384
gboolean remote_requires_reset
Definition: status.h:202
char * id
Definition: status.h:436
#define CRM_ASSERT(expr)
Definition: error.h:20
char data[0]
Definition: internal.h:86
#define crm_str(x)
Definition: logging.h:300
#define XML_ATTR_CRM_VERSION
Definition: msg_xml.h:84
#define XML_LRM_ATTR_OPSTATUS
Definition: msg_xml.h:298
gboolean unpack_lrm_resources(node_t *node, xmlNode *lrm_rsc_list, pe_working_set_t *data_set)
Definition: unpack.c:2428
int pe_get_failcount(node_t *node, resource_t *rsc, time_t *last_failure, uint32_t flags, xmlNode *xml_op, pe_working_set_t *data_set)
Definition: failcounts.c:237
#define CRMD_JOINSTATE_PENDING
Definition: crm.h:156
enum node_type type
Definition: status.h:193
uint32_t pe_wo
Definition: unpack.c:51
rsc_role_e
Definition: common.h:81
enum pe_action_flags flags
Definition: status.h:381
GHashTable * known_on
Definition: status.h:327
#define XML_LRM_ATTR_RC
Definition: msg_xml.h:299
gboolean standby
Definition: status.h:431
Definition: status.h:435
GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t *data_set)
Definition: unpack.c:3644
#define XML_NODE_JOIN_STATE
Definition: msg_xml.h:272
gboolean expected_up
Definition: status.h:183
void pe_free_action(action_t *action)
Definition: utils.c:1404
#define pe_flag_have_quorum
Definition: status.h:69
void destroy_ticket(gpointer data)
Definition: utils.c:1907
#define XML_CIB_TAG_STATUS
Definition: msg_xml.h:159
#define XML_CIB_TAG_OBJ_REF
Definition: msg_xml.h:424
void unpack_instance_attributes(xmlNode *top, xmlNode *xml_obj, const char *set_name, GHashTable *node_hash, GHashTable *hash, const char *always_first, gboolean overwrite, crm_time_t *now)
Definition: rules.c:922
#define pe_flag_is_managed_default
Definition: status.h:71
gboolean granted
Definition: status.h:429
Definition: status.h:209
gboolean remote_was_fenced
Definition: status.h:203
#define XML_NODE_IN_CLUSTER
Definition: msg_xml.h:274
#define pe_flag_stop_action_orphans
Definition: status.h:80
#define NORMALNODE
Definition: util.h:43
gboolean crm_is_true(const char *s)
Definition: strings.c:197
#define CRM_ATTR_SITE_NAME
Definition: crm.h:94
void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
Definition: unpack.c:2245
#define XML_CIB_TAG_GROUP
Definition: msg_xml.h:197
CRM_TRACE_INIT_DATA(pe_status)
#define XML_LRM_TAG_RSC_OP
Definition: msg_xml.h:256
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:16
#define pe_flag_symmetric_cluster
Definition: status.h:70
#define ID(x)
Definition: msg_xml.h:452
unsigned long long flags
Definition: status.h:113
char * generate_op_key(const char *rsc_id, const char *op_type, int interval)
Generate an operation key.
Definition: operations.c:37
#define pe_err(fmt...)
Definition: internal.h:18
gboolean unpack_tags(xmlNode *xml_tags, pe_working_set_t *data_set)
Definition: unpack.c:827
resource_t * pe_find_resource(GListPtr rsc_list, const char *id_rh)
Definition: status.c:392
#define safe_str_eq(a, b)
Definition: util.h:74
int node_score_green
Definition: utils.c:64
#define ONLINESTATUS
Definition: util.h:53
gboolean order_actions(action_t *lh_action, action_t *rh_action, enum pe_ordering order)
Definition: utils.c:1838
char * id
Definition: status.h:428
op_digest_cache_t * rsc_action_digest_cmp(resource_t *rsc, xmlNode *xml_op, node_t *node, pe_working_set_t *data_set)
Definition: utils.c:2108
char * crm_strdup_printf(char const *format,...) __attribute__((__format__(__printf__
#define XML_LRM_ATTR_MIGRATE_SOURCE
Definition: msg_xml.h:313
void freeXpathObject(xmlXPathObjectPtr xpathObj)
Definition: xpath.c:45
#define CRM_ATTR_ID
Definition: crm.h:89
gint sort_node_uname(gconstpointer a, gconstpointer b)
Definition: utils.c:239
GList * GListPtr
Definition: crm.h:210
int node_score_yellow
Definition: utils.c:65
#define XML_CIB_TAG_TICKETS
Definition: msg_xml.h:419
crm_time_t * now
Definition: status.h:105
#define crm_info(fmt, args...)
Definition: logging.h:277
char * digest_restart_calc
Definition: internal.h:326
void g_hash_destroy_str(gpointer data)
Definition: strings.c:75
GHashTable * template_rsc_sets
Definition: status.h:146
#define pe_flag_concurrent_fencing
Definition: status.h:77
GHashTable * state
Definition: status.h:432
#define pe_flag_start_failure_fatal
Definition: status.h:83
#define pe_flag_stonith_enabled
Definition: status.h:74
GList * stop_needed
Definition: status.h:154
pe_action_t * pe_fence_op(pe_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t *data_set)
Definition: utils.c:2413
enum crm_ais_msg_types type
Definition: internal.h:79
#define pe_warn_once(pe_wo_bit, fmt...)
Definition: unpack.h:116
#define pe_rsc_info(rsc, fmt, args...)
Definition: internal.h:14
#define CRMD_ACTION_STATUS
Definition: crm.h:181
GListPtr running_on
Definition: status.h:326