corosync  3.1.0
exec/votequorum.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009-2020 Red Hat, Inc.
3  *
4  * All rights reserved.
5  *
6  * Authors: Christine Caulfield (ccaulfie@redhat.com)
7  * Fabio M. Di Nitto (fdinitto@redhat.com)
8  *
9  * This software licensed under BSD license, the text of which follows:
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are met:
13  *
14  * - Redistributions of source code must retain the above copyright notice,
15  * this list of conditions and the following disclaimer.
16  * - Redistributions in binary form must reproduce the above copyright notice,
17  * this list of conditions and the following disclaimer in the documentation
18  * and/or other materials provided with the distribution.
19  * - Neither the name of the MontaVista Software, Inc. nor the names of its
20  * contributors may be used to endorse or promote products derived from this
21  * software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS"
24  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
33  * THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 #include <config.h>
37 
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <fcntl.h>
41 #include <stdint.h>
42 #include <unistd.h>
43 
44 #include <qb/qblist.h>
45 #include <qb/qbipc_common.h>
46 
47 #include "quorum.h"
48 #include <corosync/corodefs.h>
49 #include <corosync/logsys.h>
50 #include <corosync/coroapi.h>
51 #include <corosync/icmap.h>
52 #include <corosync/votequorum.h>
54 
55 #include "service.h"
56 #include "util.h"
57 
59 
60 /*
61  * interface with corosync
62  */
63 
64 static struct corosync_api_v1 *corosync_api;
65 
66 /*
67  * votequorum global config vars
68  */
69 
70 
71 static char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN];
72 static struct cluster_node *qdevice = NULL;
73 static unsigned int qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT;
74 static unsigned int qdevice_sync_timeout = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT;
75 static uint8_t qdevice_can_operate = 1;
76 static void *qdevice_reg_conn = NULL;
77 static uint8_t qdevice_master_wins = 0;
78 
79 static uint8_t two_node = 0;
80 
81 static uint8_t wait_for_all = 0;
82 static uint8_t wait_for_all_status = 0;
83 static uint8_t wait_for_all_autoset = 0; /* Wait for all is not set explicitly and follows two_node */
84 
85 static enum {ATB_NONE, ATB_LOWEST, ATB_HIGHEST, ATB_LIST} auto_tie_breaker = ATB_NONE, initial_auto_tie_breaker = ATB_NONE;
86 static int lowest_node_id = -1;
87 static int highest_node_id = -1;
88 
89 #define DEFAULT_LMS_WIN 10000
90 static uint8_t last_man_standing = 0;
91 static uint32_t last_man_standing_window = DEFAULT_LMS_WIN;
92 
93 static uint8_t allow_downscale = 0;
94 static uint32_t ev_barrier = 0;
95 
96 static uint8_t ev_tracking = 0;
97 static uint32_t ev_tracking_barrier = 0;
98 static int ev_tracking_fd = -1;
99 
100 /*
101  * votequorum_exec defines/structs/forward definitions
102  */
103 
105  struct qb_ipc_request_header header __attribute__((aligned(8)));
106  uint32_t nodeid;
107  uint32_t votes;
108  uint32_t expected_votes;
109  uint32_t flags;
110 } __attribute__((packed));
111 
113  struct qb_ipc_request_header header __attribute__((aligned(8)));
114  uint32_t nodeid;
115  uint32_t value;
116  uint8_t param;
117  uint8_t _pad0;
118  uint8_t _pad1;
119  uint8_t _pad2;
120 } __attribute__((packed));
121 
123  struct qb_ipc_request_header header __attribute__((aligned(8)));
124  uint32_t operation;
126 } __attribute__((packed));
127 
129  struct qb_ipc_request_header header __attribute__((aligned(8)));
132 } __attribute__((packed));
133 
134 /*
135  * votequorum_exec onwire version (via totem)
136  */
137 
138 #include "votequorum.h"
139 
140 /*
141  * votequorum_exec onwire messages (via totem)
142  */
143 
144 #define MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO 0
145 #define MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE 1
146 #define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG 2
147 #define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE 3
148 
149 static void votequorum_exec_send_expectedvotes_notification(void);
150 static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context);
151 static int votequorum_exec_send_nodelist_notification(void *conn, uint64_t context);
152 
153 #define VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES 1
154 #define VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES 2
155 #define VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA 3
156 
157 static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value);
158 
159 /*
160  * used by req_exec_quorum_qdevice_reg
161  */
162 #define VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER 0
163 #define VOTEQUORUM_QDEVICE_OPERATION_REGISTER 1
164 
165 /*
166  * votequorum internal node status/view
167  */
168 
169 #define NODE_FLAGS_QUORATE 1
170 #define NODE_FLAGS_LEAVING 2
171 #define NODE_FLAGS_WFASTATUS 4
172 #define NODE_FLAGS_FIRST 8
173 #define NODE_FLAGS_QDEVICE_REGISTERED 16
174 #define NODE_FLAGS_QDEVICE_ALIVE 32
175 #define NODE_FLAGS_QDEVICE_CAST_VOTE 64
176 #define NODE_FLAGS_QDEVICE_MASTER_WINS 128
177 
178 typedef enum {
183 
184 struct cluster_node {
185  int node_id;
187  uint32_t votes;
188  uint32_t expected_votes;
189  uint32_t flags;
190  struct qb_list_head list;
191 };
192 
193 /*
194  * votequorum internal quorum status
195  */
196 
197 static uint8_t quorum;
198 static uint8_t cluster_is_quorate;
199 
200 /*
201  * votequorum membership data
202  */
203 
204 static struct cluster_node *us;
205 static struct qb_list_head cluster_members_list;
206 static unsigned int quorum_members[PROCESSOR_COUNT_MAX];
207 static unsigned int previous_quorum_members[PROCESSOR_COUNT_MAX];
208 static unsigned int atb_nodelist[PROCESSOR_COUNT_MAX];
209 static int quorum_members_entries = 0;
210 static int previous_quorum_members_entries = 0;
211 static int atb_nodelist_entries = 0;
212 static struct memb_ring_id quorum_ringid;
213 
214 /*
215  * pre allocate all cluster_nodes + one for qdevice
216  */
217 static struct cluster_node cluster_nodes[PROCESSOR_COUNT_MAX+2];
218 static int cluster_nodes_entries = 0;
219 
220 /*
221  * votequorum tracking
222  */
223 struct quorum_pd {
224  unsigned char track_flags;
227  struct qb_list_head list;
228  void *conn;
229 };
230 
231 static struct qb_list_head trackers_list;
232 
233 /*
234  * votequorum timers
235  */
236 
237 static corosync_timer_handle_t qdevice_timer;
238 static int qdevice_timer_set = 0;
239 static corosync_timer_handle_t last_man_standing_timer;
240 static int last_man_standing_timer_set = 0;
241 static int sync_nodeinfo_sent = 0;
242 static int sync_wait_for_poll_or_timeout = 0;
243 
244 /*
245  * Service Interfaces required by service_message_handler struct
246  */
247 
248 static int sync_in_progress = 0;
249 
250 static void votequorum_sync_init (
251  const unsigned int *trans_list,
252  size_t trans_list_entries,
253  const unsigned int *member_list,
254  size_t member_list_entries,
255  const struct memb_ring_id *ring_id);
256 
257 static int votequorum_sync_process (void);
258 static void votequorum_sync_activate (void);
259 static void votequorum_sync_abort (void);
260 
261 static quorum_set_quorate_fn_t quorum_callback;
262 
263 /*
264  * votequorum_exec handler and definitions
265  */
266 
267 static char *votequorum_exec_init_fn (struct corosync_api_v1 *api);
268 static int votequorum_exec_exit_fn (void);
269 static int votequorum_exec_send_nodeinfo(uint32_t nodeid);
270 
271 static void message_handler_req_exec_votequorum_nodeinfo (
272  const void *message,
273  unsigned int nodeid);
274 static void exec_votequorum_nodeinfo_endian_convert (void *message);
275 
276 static void message_handler_req_exec_votequorum_reconfigure (
277  const void *message,
278  unsigned int nodeid);
279 static void exec_votequorum_reconfigure_endian_convert (void *message);
280 
281 static void message_handler_req_exec_votequorum_qdevice_reg (
282  const void *message,
283  unsigned int nodeid);
284 static void exec_votequorum_qdevice_reg_endian_convert (void *message);
285 
286 static void message_handler_req_exec_votequorum_qdevice_reconfigure (
287  const void *message,
288  unsigned int nodeid);
289 static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message);
290 
291 static struct corosync_exec_handler votequorum_exec_engine[] =
292 {
293  { /* 0 */
294  .exec_handler_fn = message_handler_req_exec_votequorum_nodeinfo,
295  .exec_endian_convert_fn = exec_votequorum_nodeinfo_endian_convert
296  },
297  { /* 1 */
298  .exec_handler_fn = message_handler_req_exec_votequorum_reconfigure,
299  .exec_endian_convert_fn = exec_votequorum_reconfigure_endian_convert
300  },
301  { /* 2 */
302  .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reg,
303  .exec_endian_convert_fn = exec_votequorum_qdevice_reg_endian_convert
304  },
305  { /* 3 */
306  .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reconfigure,
307  .exec_endian_convert_fn = exec_votequorum_qdevice_reconfigure_endian_convert
308  },
309 };
310 
311 /*
312  * Library Handler and Functions Definitions
313  */
314 
315 static int quorum_lib_init_fn (void *conn);
316 
317 static int quorum_lib_exit_fn (void *conn);
318 
319 static void qdevice_timer_fn(void *arg);
320 
321 static void message_handler_req_lib_votequorum_getinfo (void *conn,
322  const void *message);
323 
324 static void message_handler_req_lib_votequorum_setexpected (void *conn,
325  const void *message);
326 
327 static void message_handler_req_lib_votequorum_setvotes (void *conn,
328  const void *message);
329 
330 static void message_handler_req_lib_votequorum_trackstart (void *conn,
331  const void *message);
332 
333 static void message_handler_req_lib_votequorum_trackstop (void *conn,
334  const void *message);
335 
336 static void message_handler_req_lib_votequorum_qdevice_register (void *conn,
337  const void *message);
338 
339 static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn,
340  const void *message);
341 
342 static void message_handler_req_lib_votequorum_qdevice_update (void *conn,
343  const void *message);
344 
345 static void message_handler_req_lib_votequorum_qdevice_poll (void *conn,
346  const void *message);
347 
348 static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn,
349  const void *message);
350 
351 static struct corosync_lib_handler quorum_lib_service[] =
352 {
353  { /* 0 */
354  .lib_handler_fn = message_handler_req_lib_votequorum_getinfo,
356  },
357  { /* 1 */
358  .lib_handler_fn = message_handler_req_lib_votequorum_setexpected,
360  },
361  { /* 2 */
362  .lib_handler_fn = message_handler_req_lib_votequorum_setvotes,
364  },
365  { /* 3 */
366  .lib_handler_fn = message_handler_req_lib_votequorum_trackstart,
368  },
369  { /* 4 */
370  .lib_handler_fn = message_handler_req_lib_votequorum_trackstop,
372  },
373  { /* 5 */
374  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_register,
376  },
377  { /* 6 */
378  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_unregister,
380  },
381  { /* 7 */
382  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_update,
384  },
385  { /* 8 */
386  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_poll,
388  },
389  { /* 9 */
390  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_master_wins,
392  }
393 };
394 
395 static struct corosync_service_engine votequorum_service_engine = {
396  .name = "corosync vote quorum service v1.0",
397  .id = VOTEQUORUM_SERVICE,
398  .priority = 2,
399  .private_data_size = sizeof (struct quorum_pd),
400  .allow_inquorate = CS_LIB_ALLOW_INQUORATE,
401  .flow_control = COROSYNC_LIB_FLOW_CONTROL_REQUIRED,
402  .lib_init_fn = quorum_lib_init_fn,
403  .lib_exit_fn = quorum_lib_exit_fn,
404  .lib_engine = quorum_lib_service,
405  .lib_engine_count = sizeof (quorum_lib_service) / sizeof (struct corosync_lib_handler),
406  .exec_init_fn = votequorum_exec_init_fn,
407  .exec_exit_fn = votequorum_exec_exit_fn,
408  .exec_engine = votequorum_exec_engine,
409  .exec_engine_count = sizeof (votequorum_exec_engine) / sizeof (struct corosync_exec_handler),
410  .sync_init = votequorum_sync_init,
411  .sync_process = votequorum_sync_process,
412  .sync_activate = votequorum_sync_activate,
413  .sync_abort = votequorum_sync_abort
414 };
415 
417 {
418  return (&votequorum_service_engine);
419 }
420 
421 static struct default_service votequorum_service[] = {
422  {
423  .name = "corosync_votequorum",
424  .ver = 0,
426  },
427 };
428 
429 /*
430  * common/utility macros/functions
431  */
432 
433 #define max(a,b) (((a) > (b)) ? (a) : (b))
434 
435 static void node_add_ordered(struct cluster_node *newnode)
436 {
437  struct cluster_node *node = NULL;
438  struct qb_list_head *tmp;
439 
440  ENTER();
441 
442  qb_list_for_each(tmp, &cluster_members_list) {
443  node = qb_list_entry(tmp, struct cluster_node, list);
444  if (newnode->node_id < node->node_id) {
445  break;
446  }
447  }
448 
449  if (!node) {
450  qb_list_add(&newnode->list, &cluster_members_list);
451  } else {
452  qb_list_add_tail(&newnode->list, &node->list);
453  }
454 
455  LEAVE();
456 }
457 
458 static struct cluster_node *allocate_node(unsigned int nodeid)
459 {
460  struct cluster_node *cl = NULL;
461  struct qb_list_head *tmp;
462 
463  ENTER();
464 
465  if (cluster_nodes_entries <= PROCESSOR_COUNT_MAX + 1) {
466  cl = (struct cluster_node *)&cluster_nodes[cluster_nodes_entries];
467  cluster_nodes_entries++;
468  } else {
469  qb_list_for_each(tmp, &cluster_members_list) {
470  cl = qb_list_entry(tmp, struct cluster_node, list);
471  if (cl->state == NODESTATE_DEAD) {
472  break;
473  }
474  }
475  /*
476  * this should never happen
477  */
478  if (!cl) {
479  log_printf(LOGSYS_LEVEL_CRIT, "Unable to find memory for node " CS_PRI_NODE_ID " data!!", nodeid);
480  goto out;
481  }
482  qb_list_del(tmp);
483  }
484 
485  memset(cl, 0, sizeof(struct cluster_node));
486  cl->node_id = nodeid;
488  node_add_ordered(cl);
489  }
490 
491 out:
492  LEAVE();
493 
494  return cl;
495 }
496 
497 static struct cluster_node *find_node_by_nodeid(unsigned int nodeid)
498 {
499  struct cluster_node *node;
500  struct qb_list_head *tmp;
501 
502  ENTER();
503 
504  if (nodeid == us->node_id) {
505  LEAVE();
506  return us;
507  }
508 
510  LEAVE();
511  return qdevice;
512  }
513 
514  qb_list_for_each(tmp, &cluster_members_list) {
515  node = qb_list_entry(tmp, struct cluster_node, list);
516  if (node->node_id == nodeid) {
517  LEAVE();
518  return node;
519  }
520  }
521 
522  LEAVE();
523  return NULL;
524 }
525 
526 static void get_lowest_node_id(void)
527 {
528  struct cluster_node *node = NULL;
529  struct qb_list_head *tmp;
530 
531  ENTER();
532 
533  lowest_node_id = us->node_id;
534 
535  qb_list_for_each(tmp, &cluster_members_list) {
536  node = qb_list_entry(tmp, struct cluster_node, list);
537  if ((node->state == NODESTATE_MEMBER) &&
538  (node->node_id < lowest_node_id)) {
539  lowest_node_id = node->node_id;
540  }
541  }
542  log_printf(LOGSYS_LEVEL_DEBUG, "lowest node id: " CS_PRI_NODE_ID " us: " CS_PRI_NODE_ID, lowest_node_id, us->node_id);
543  icmap_set_uint32("runtime.votequorum.lowest_node_id", lowest_node_id);
544 
545  LEAVE();
546 }
547 
548 static void get_highest_node_id(void)
549 {
550  struct cluster_node *node = NULL;
551  struct qb_list_head *tmp;
552 
553  ENTER();
554 
555  highest_node_id = us->node_id;
556 
557  qb_list_for_each(tmp, &cluster_members_list) {
558  node = qb_list_entry(tmp, struct cluster_node, list);
559  if ((node->state == NODESTATE_MEMBER) &&
560  (node->node_id > highest_node_id)) {
561  highest_node_id = node->node_id;
562  }
563  }
564  log_printf(LOGSYS_LEVEL_DEBUG, "highest node id: " CS_PRI_NODE_ID " us: " CS_PRI_NODE_ID, highest_node_id, us->node_id);
565  icmap_set_uint32("runtime.votequorum.highest_node_id", highest_node_id);
566 
567  LEAVE();
568 }
569 
570 static int check_low_node_id_partition(void)
571 {
572  struct cluster_node *node = NULL;
573  struct qb_list_head *tmp;
574  int found = 0;
575 
576  ENTER();
577 
578  qb_list_for_each(tmp, &cluster_members_list) {
579  node = qb_list_entry(tmp, struct cluster_node, list);
580  if ((node->state == NODESTATE_MEMBER) &&
581  (node->node_id == lowest_node_id)) {
582  found = 1;
583  }
584  }
585 
586  LEAVE();
587  return found;
588 }
589 
590 static int check_high_node_id_partition(void)
591 {
592  struct cluster_node *node = NULL;
593  struct qb_list_head *tmp;
594  int found = 0;
595 
596  ENTER();
597 
598  qb_list_for_each(tmp, &cluster_members_list) {
599  node = qb_list_entry(tmp, struct cluster_node, list);
600  if ((node->state == NODESTATE_MEMBER) &&
601  (node->node_id == highest_node_id)) {
602  found = 1;
603  }
604  }
605 
606  LEAVE();
607  return found;
608 }
609 
610 static int is_in_nodelist(int nodeid, unsigned int *members, int entries)
611 {
612  int i;
613  ENTER();
614 
615  for (i=0; i<entries; i++) {
616  if (nodeid == members[i]) {
617  LEAVE();
618  return 1;
619  }
620  }
621  LEAVE();
622  return 0;
623 }
624 
625 /*
626  * The algorithm for a list of tie-breaker nodes is:
627  * travel the list of nodes in the auto_tie_breaker list,
628  * if the node IS in our current partition, check if the
629  * nodes earlier in the atb list are in the 'previous' partition;
630  * If none are found then we are safe to be quorate, if any are
631  * then we cannot be as we don't know if that node is up or down.
632  * If we don't have a node in the current list we are NOT quorate.
633  * Obviously if we find the first node in the atb list in our
634  * partition then we are quorate.
635  *
636  * Special cases lowest nodeid, and highest nodeid are handled separately.
637  */
638 static int check_auto_tie_breaker(void)
639 {
640  int i, j;
641  int res;
642  ENTER();
643 
644  if (auto_tie_breaker == ATB_LOWEST) {
645  res = check_low_node_id_partition();
646  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LOWEST decision: %d", res);
647  LEAVE();
648  return res;
649  }
650  if (auto_tie_breaker == ATB_HIGHEST) {
651  res = check_high_node_id_partition();
652  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_HIGHEST decision: %d", res);
653  LEAVE();
654  return res;
655  }
656 
657  /* Assume ATB_LIST, we should never be called for ATB_NONE */
658  for (i=0; i < atb_nodelist_entries; i++) {
659  if (is_in_nodelist(atb_nodelist[i], quorum_members, quorum_members_entries)) {
660  /*
661  * Node is in our partition, if any of its predecessors are
662  * in the previous quorum partition then it might be in the
663  * 'other half' (as we've got this far without seeing it here)
664  * and so we can't be quorate.
665  */
666  for (j=0; j<i; j++) {
667  if (is_in_nodelist(atb_nodelist[j], previous_quorum_members, previous_quorum_members_entries)) {
668  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found node " CS_PRI_NODE_ID " in previous partition but not here, quorum denied", atb_nodelist[j]);
669  LEAVE();
670  return 0;
671  }
672  }
673 
674  /*
675  * None of the other list nodes were in the previous partition, if there
676  * are enough votes, we can be quorate
677  */
678  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found node " CS_PRI_NODE_ID " in current partition, we can be quorate", atb_nodelist[i]);
679  LEAVE();
680  return 1;
681  }
682  }
683  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found no list nodes in current partition, we cannot be quorate");
684  LEAVE();
685  return 0;
686 }
687 
688 /*
689  * atb_string can be either:
690  * 'lowest'
691  * 'highest'
692  * a list of nodeids
693  */
694 static void parse_atb_string(char *atb_string)
695 {
696  char *ptr;
697  long num;
698 
699  ENTER();
700  auto_tie_breaker = ATB_NONE;
701 
702  if (!strcmp(atb_string, "lowest"))
703  auto_tie_breaker = ATB_LOWEST;
704 
705  if (!strcmp(atb_string, "highest"))
706  auto_tie_breaker = ATB_HIGHEST;
707 
708  if (atoi(atb_string)) {
709 
710  atb_nodelist_entries = 0;
711  ptr = atb_string;
712  do {
713  num = strtol(ptr, &ptr, 10);
714  if (num) {
715  log_printf(LOGSYS_LEVEL_DEBUG, "ATB nodelist[%d] = %d", atb_nodelist_entries, num);
716  atb_nodelist[atb_nodelist_entries++] = num;
717  }
718  } while (num);
719 
720  if (atb_nodelist_entries) {
721  auto_tie_breaker = ATB_LIST;
722  }
723  }
724  icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
725  log_printf(LOGSYS_LEVEL_DEBUG, "ATB type = %d", auto_tie_breaker);
726 
727  /* Make sure we got something */
728  if (auto_tie_breaker == ATB_NONE) {
729  log_printf(LOGSYS_LEVEL_WARNING, "auto_tie_breaker_nodes is not valid. It must be 'lowest', 'highest' or a space-separated list of node IDs. auto_tie_breaker is disabled");
730  auto_tie_breaker = ATB_NONE;
731  }
732  LEAVE();
733 }
734 
735 static int check_qdevice_master(void)
736 {
737  struct cluster_node *node = NULL;
738  struct qb_list_head *tmp;
739  int found = 0;
740 
741  ENTER();
742 
743  qb_list_for_each(tmp, &cluster_members_list) {
744  node = qb_list_entry(tmp, struct cluster_node, list);
745  if ((node->state == NODESTATE_MEMBER) &&
748  found = 1;
749  }
750  }
751 
752  LEAVE();
753  return found;
754 }
755 
756 static void decode_flags(uint32_t flags)
757 {
758  ENTER();
759 
761  "flags: quorate: %s Leaving: %s WFA Status: %s First: %s Qdevice: %s QdeviceAlive: %s QdeviceCastVote: %s QdeviceMasterWins: %s",
762  (flags & NODE_FLAGS_QUORATE)?"Yes":"No",
763  (flags & NODE_FLAGS_LEAVING)?"Yes":"No",
764  (flags & NODE_FLAGS_WFASTATUS)?"Yes":"No",
765  (flags & NODE_FLAGS_FIRST)?"Yes":"No",
766  (flags & NODE_FLAGS_QDEVICE_REGISTERED)?"Yes":"No",
767  (flags & NODE_FLAGS_QDEVICE_ALIVE)?"Yes":"No",
768  (flags & NODE_FLAGS_QDEVICE_CAST_VOTE)?"Yes":"No",
769  (flags & NODE_FLAGS_QDEVICE_MASTER_WINS)?"Yes":"No");
770 
771  LEAVE();
772 }
773 
774 /*
775  * load/save are copied almost pristine from totemsrp,c
776  */
777 static int load_ev_tracking_barrier(void)
778 {
779  int res = 0;
780  char filename[PATH_MAX];
781 
782  ENTER();
783 
784  snprintf(filename, sizeof(filename) - 1, "%s/ev_tracking", get_state_dir());
785 
786  ev_tracking_fd = open(filename, O_RDWR, 0700);
787  if (ev_tracking_fd != -1) {
788  res = read (ev_tracking_fd, &ev_tracking_barrier, sizeof(uint32_t));
789  close(ev_tracking_fd);
790  if (res == sizeof (uint32_t)) {
791  LEAVE();
792  return 0;
793  }
794  }
795 
796  ev_tracking_barrier = 0;
797  umask(0);
798  ev_tracking_fd = open (filename, O_CREAT|O_RDWR, 0700);
799  if (ev_tracking_fd != -1) {
800  res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t));
801  if ((res == -1) || (res != sizeof (uint32_t))) {
803  "Unable to write to %s", filename);
804  }
805  close(ev_tracking_fd);
806  LEAVE();
807  return 0;
808  }
810  "Unable to create %s file", filename);
811 
812  LEAVE();
813 
814  return -1;
815 }
816 
817 static void update_wait_for_all_status(uint8_t wfa_status)
818 {
819  ENTER();
820 
821  wait_for_all_status = wfa_status;
822  if (wait_for_all_status) {
824  } else {
825  us->flags &= ~NODE_FLAGS_WFASTATUS;
826  }
827  icmap_set_uint8("runtime.votequorum.wait_for_all_status",
828  wait_for_all_status);
829 
830  LEAVE();
831 }
832 
833 static void update_two_node(void)
834 {
835  ENTER();
836 
837  icmap_set_uint8("runtime.votequorum.two_node", two_node);
838 
839  LEAVE();
840 }
841 
842 static void update_ev_barrier(uint32_t expected_votes)
843 {
844  ENTER();
845 
846  ev_barrier = expected_votes;
847  icmap_set_uint32("runtime.votequorum.ev_barrier", ev_barrier);
848 
849  LEAVE();
850 }
851 
852 static void update_qdevice_can_operate(uint8_t status)
853 {
854  ENTER();
855 
856  qdevice_can_operate = status;
857  icmap_set_uint8("runtime.votequorum.qdevice_can_operate", qdevice_can_operate);
858 
859  LEAVE();
860 }
861 
862 static void update_qdevice_master_wins(uint8_t allow)
863 {
864  ENTER();
865 
866  qdevice_master_wins = allow;
867  icmap_set_uint8("runtime.votequorum.qdevice_master_wins", qdevice_master_wins);
868 
869  LEAVE();
870 }
871 
872 static void update_ev_tracking_barrier(uint32_t ev_t_barrier)
873 {
874  int res;
875 
876  ENTER();
877 
878  ev_tracking_barrier = ev_t_barrier;
879  icmap_set_uint32("runtime.votequorum.ev_tracking_barrier", ev_tracking_barrier);
880 
881  if (lseek (ev_tracking_fd, 0, SEEK_SET) != 0) {
883  "Unable to update ev_tracking_barrier on disk data!!!");
884  LEAVE();
885  return;
886  }
887 
888  res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t));
889  if (res != sizeof (uint32_t)) {
891  "Unable to update ev_tracking_barrier on disk data!!!");
892  }
893 #ifdef HAVE_FDATASYNC
894  fdatasync(ev_tracking_fd);
895 #else
896  fsync(ev_tracking_fd);
897 #endif
898 
899  LEAVE();
900 }
901 
902 /*
903  * quorum calculation core bits
904  */
905 
906 static int calculate_quorum(int allow_decrease, unsigned int max_expected, unsigned int *ret_total_votes)
907 {
908  struct qb_list_head *nodelist;
909  struct cluster_node *node;
910  unsigned int total_votes = 0;
911  unsigned int highest_expected = 0;
912  unsigned int newquorum, q1, q2;
913  unsigned int total_nodes = 0;
914 
915  ENTER();
916 
917  if ((allow_downscale) && (allow_decrease) && (max_expected)) {
918  max_expected = max(ev_barrier, max_expected);
919  }
920 
921  qb_list_for_each(nodelist, &cluster_members_list) {
922  node = qb_list_entry(nodelist, struct cluster_node, list);
923 
924  log_printf(LOGSYS_LEVEL_DEBUG, "node " CS_PRI_NODE_ID " state=%d, votes=%u, expected=%u",
925  node->node_id, node->state, node->votes, node->expected_votes);
926 
927  if (node->state == NODESTATE_MEMBER) {
928  highest_expected = max(highest_expected, node->expected_votes);
929  total_votes += node->votes;
930  total_nodes++;
931  }
932  }
933 
935  log_printf(LOGSYS_LEVEL_DEBUG, "node 0 state=1, votes=%u", qdevice->votes);
936  total_votes += qdevice->votes;
937  total_nodes++;
938  }
939 
940  if (max_expected > 0) {
941  highest_expected = max_expected;
942  }
943 
944  /*
945  * This quorum calculation is taken from the OpenVMS Cluster Systems
946  * manual, but, then, you guessed that didn't you
947  */
948  q1 = (highest_expected + 2) / 2;
949  q2 = (total_votes + 2) / 2;
950  newquorum = max(q1, q2);
951 
952  /*
953  * Normally quorum never decreases but the system administrator can
954  * force it down by setting expected votes to a maximum value
955  */
956  if (!allow_decrease) {
957  newquorum = max(quorum, newquorum);
958  }
959 
960  /*
961  * The special two_node mode allows each of the two nodes to retain
962  * quorum if the other fails. Only one of the two should live past
963  * fencing (as both nodes try to fence each other in split-brain.)
964  * Also: if there are more than two nodes, force us inquorate to avoid
965  * any damage or confusion.
966  */
967  if (two_node && total_nodes <= 2) {
968  newquorum = 1;
969  }
970 
971  if (ret_total_votes) {
972  *ret_total_votes = total_votes;
973  }
974 
975  LEAVE();
976  return newquorum;
977 }
978 
979 static void update_node_expected_votes(int new_expected_votes)
980 {
981  struct qb_list_head *nodelist;
982  struct cluster_node *node;
983 
984  if (new_expected_votes) {
985  qb_list_for_each(nodelist, &cluster_members_list) {
986  node = qb_list_entry(nodelist, struct cluster_node, list);
987 
988  if (node->state == NODESTATE_MEMBER) {
989  node->expected_votes = new_expected_votes;
990  }
991  }
992  }
993 }
994 
995 static void are_we_quorate(unsigned int total_votes)
996 {
997  int quorate;
998  int quorum_change = 0;
999 
1000  ENTER();
1001 
1002  /*
1003  * wait for all nodes to show up before granting quorum
1004  */
1005 
1006  if ((wait_for_all) && (wait_for_all_status)) {
1007  if (total_votes != us->expected_votes) {
1009  "Waiting for all cluster members. "
1010  "Current votes: %d expected_votes: %d",
1011  total_votes, us->expected_votes);
1012  assert(!cluster_is_quorate);
1013  return;
1014  }
1015  update_wait_for_all_status(0);
1016  }
1017 
1018  if (quorum > total_votes) {
1019  quorate = 0;
1020  } else {
1021  quorate = 1;
1022  get_lowest_node_id();
1023  get_highest_node_id();
1024  }
1025 
1026  if ((auto_tie_breaker != ATB_NONE) &&
1027  /* Must be a half (or half-1) split */
1028  (total_votes == (us->expected_votes / 2)) &&
1029  /* If the 'other' partition in a split might have quorum then we can't run ATB */
1030  (previous_quorum_members_entries - quorum_members_entries < quorum) &&
1031  (check_auto_tie_breaker() == 1)) {
1032  quorate = 1;
1033  }
1034 
1035  if ((qdevice_master_wins) &&
1036  (!quorate) &&
1037  (check_qdevice_master() == 1)) {
1038  log_printf(LOGSYS_LEVEL_DEBUG, "node is quorate as part of master_wins partition");
1039  quorate = 1;
1040  }
1041 
1042  if (cluster_is_quorate && !quorate) {
1043  quorum_change = 1;
1044  log_printf(LOGSYS_LEVEL_DEBUG, "quorum lost, blocking activity");
1045  }
1046  if (!cluster_is_quorate && quorate) {
1047  quorum_change = 1;
1048  log_printf(LOGSYS_LEVEL_DEBUG, "quorum regained, resuming activity");
1049  }
1050 
1051  cluster_is_quorate = quorate;
1052  if (cluster_is_quorate) {
1053  us->flags |= NODE_FLAGS_QUORATE;
1054  } else {
1055  us->flags &= ~NODE_FLAGS_QUORATE;
1056  }
1057 
1058  if (wait_for_all) {
1059  if (quorate) {
1060  update_wait_for_all_status(0);
1061  } else {
1062  update_wait_for_all_status(1);
1063  }
1064  }
1065 
1066  if ((quorum_change) &&
1067  (sync_in_progress == 0)) {
1068  quorum_callback(quorum_members, quorum_members_entries,
1069  cluster_is_quorate, &quorum_ringid);
1070  votequorum_exec_send_quorum_notification(NULL, 0L);
1071  }
1072 
1073  LEAVE();
1074 }
1075 
1076 static void get_total_votes(unsigned int *totalvotes, unsigned int *current_members)
1077 {
1078  unsigned int total_votes = 0;
1079  unsigned int cluster_members = 0;
1080  struct qb_list_head *nodelist;
1081  struct cluster_node *node;
1082 
1083  ENTER();
1084 
1085  qb_list_for_each(nodelist, &cluster_members_list) {
1086  node = qb_list_entry(nodelist, struct cluster_node, list);
1087  if (node->state == NODESTATE_MEMBER) {
1088  cluster_members++;
1089  total_votes += node->votes;
1090  }
1091  }
1092 
1093  if (qdevice->votes) {
1094  total_votes += qdevice->votes;
1095  cluster_members++;
1096  }
1097 
1098  *totalvotes = total_votes;
1099  *current_members = cluster_members;
1100 
1101  LEAVE();
1102 }
1103 
1104 /*
1105  * Recalculate cluster quorum, set quorate and notify changes
1106  */
1107 static void recalculate_quorum(int allow_decrease, int by_current_nodes)
1108 {
1109  unsigned int total_votes = 0;
1110  unsigned int cluster_members = 0;
1111 
1112  ENTER();
1113 
1114  get_total_votes(&total_votes, &cluster_members);
1115 
1116  if (!by_current_nodes) {
1117  cluster_members = 0;
1118  }
1119 
1120  /*
1121  * Keep expected_votes at the highest number of votes in the cluster
1122  */
1123  log_printf(LOGSYS_LEVEL_DEBUG, "total_votes=%d, expected_votes=%d", total_votes, us->expected_votes);
1124  if (total_votes > us->expected_votes) {
1125  us->expected_votes = total_votes;
1126  votequorum_exec_send_expectedvotes_notification();
1127  }
1128 
1129  if ((ev_tracking) &&
1130  (us->expected_votes > ev_tracking_barrier)) {
1131  update_ev_tracking_barrier(us->expected_votes);
1132  }
1133 
1134  quorum = calculate_quorum(allow_decrease, cluster_members, &total_votes);
1135  update_node_expected_votes(cluster_members);
1136 
1137  are_we_quorate(total_votes);
1138 
1139  LEAVE();
1140 }
1141 
1142 /*
1143  * configuration bits and pieces
1144  */
1145 
1146 static int votequorum_read_nodelist_configuration(uint32_t *votes,
1147  uint32_t *nodes,
1148  uint32_t *expected_votes)
1149 {
1150  icmap_iter_t iter;
1151  const char *iter_key;
1152  char tmp_key[ICMAP_KEYNAME_MAXLEN];
1153  uint32_t our_pos, node_pos, last_node_pos=-1;
1154  uint32_t nodecount = 0;
1155  uint32_t nodelist_expected_votes = 0;
1156  uint32_t node_votes = 0;
1157  int res = 0;
1158 
1159  ENTER();
1160 
1161  if (icmap_get_uint32("nodelist.local_node_pos", &our_pos) != CS_OK) {
1163  "No nodelist defined or our node is not in the nodelist");
1164  return 0;
1165  }
1166 
1167  iter = icmap_iter_init("nodelist.node.");
1168 
1169  while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
1170 
1171  res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key);
1172  if (res != 2) {
1173  continue;
1174  }
1175 
1176  /*
1177  * If current node_pos is the same as the last_node_pos then skip it
1178  * so we only do the code below once per node.
1179  * (icmap keys are always in order)
1180  */
1181  if (last_node_pos == node_pos) {
1182  continue;
1183  }
1184  last_node_pos = node_pos;
1185 
1186  nodecount++;
1187 
1188  snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.quorum_votes", node_pos);
1189  if (icmap_get_uint32(tmp_key, &node_votes) != CS_OK) {
1190  node_votes = 1;
1191  }
1192 
1193  nodelist_expected_votes = nodelist_expected_votes + node_votes;
1194 
1195  if (node_pos == our_pos) {
1196  *votes = node_votes;
1197  }
1198  }
1199 
1200  *expected_votes = nodelist_expected_votes;
1201  *nodes = nodecount;
1202 
1203  icmap_iter_finalize(iter);
1204 
1205  LEAVE();
1206 
1207  return 1;
1208 }
1209 
1210 static int votequorum_qdevice_is_configured(uint32_t *qdevice_votes)
1211 {
1212  char *qdevice_model = NULL;
1213  int ret = 0;
1214 
1215  ENTER();
1216 
1217  if (icmap_get_string("quorum.device.model", &qdevice_model) == CS_OK) {
1218  if (strlen(qdevice_model)) {
1219  if (icmap_get_uint32("quorum.device.votes", qdevice_votes) != CS_OK) {
1220  *qdevice_votes = -1;
1221  }
1222  if (icmap_get_uint32("quorum.device.timeout", &qdevice_timeout) != CS_OK) {
1223  qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT;
1224  }
1225  if (icmap_get_uint32("quorum.device.sync_timeout", &qdevice_sync_timeout) != CS_OK) {
1226  qdevice_sync_timeout = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT;
1227  }
1228  update_qdevice_can_operate(1);
1229  ret = 1;
1230  }
1231 
1232  free(qdevice_model);
1233  }
1234 
1235  LEAVE();
1236 
1237  return ret;
1238 }
1239 
1240 #define VOTEQUORUM_READCONFIG_STARTUP 0
1241 #define VOTEQUORUM_READCONFIG_RUNTIME 1
1242 
1243 static char *votequorum_readconfig(int runtime)
1244 {
1245  uint32_t node_votes = 0, qdevice_votes = 0;
1246  uint32_t node_expected_votes = 0, expected_votes = 0;
1247  uint32_t node_count = 0;
1248  uint8_t atb = 0;
1249  int have_nodelist, have_qdevice;
1250  char *atb_string = NULL;
1251  char *error = NULL;
1252 
1253  ENTER();
1254 
1255  log_printf(LOGSYS_LEVEL_DEBUG, "Reading configuration (runtime: %d)", runtime);
1256 
1257  /*
1258  * Set the few things we re-read at runtime back to their defaults
1259  */
1260  if (runtime) {
1261  two_node = 0;
1262  expected_votes = 0;
1263  /* auto_tie_breaker cannot be changed by config reload, but
1264  * we automatically disable it on odd-sized clusters without
1265  * wait_for_all.
1266  * We may need to re-enable it when membership changes to ensure
1267  * that auto_tie_breaker is consistent across all nodes */
1268  auto_tie_breaker = initial_auto_tie_breaker;
1269  icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
1270  }
1271 
1272  /*
1273  * gather basic data here
1274  */
1275  (void)icmap_get_uint32("quorum.expected_votes", &expected_votes);
1276  have_nodelist = votequorum_read_nodelist_configuration(&node_votes, &node_count, &node_expected_votes);
1277  have_qdevice = votequorum_qdevice_is_configured(&qdevice_votes);
1278  (void)icmap_get_uint8("quorum.two_node", &two_node);
1279 
1280  /*
1281  * do config verification and enablement
1282  */
1283 
1284  if ((!have_nodelist) && (!expected_votes)) {
1285  if (!runtime) {
1286  error = (char *)"configuration error: nodelist or quorum.expected_votes must be configured!";
1287  } else {
1288  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: nodelist or quorum.expected_votes must be configured!");
1289  log_printf(LOGSYS_LEVEL_CRIT, "will continue with current runtime data");
1290  }
1291  goto out;
1292  }
1293 
1294  /*
1295  * two_node and qdevice are not compatible in the same config.
1296  * try to make an educated guess of what to do
1297  */
1298 
1299  if ((two_node) && (have_qdevice)) {
1300  if (!runtime) {
1301  error = (char *)"configuration error: two_node and quorum device cannot be configured at the same time!";
1302  goto out;
1303  } else {
1304  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: two_node and quorum device cannot be configured at the same time!");
1306  log_printf(LOGSYS_LEVEL_CRIT, "quorum device is registered, disabling two_node");
1307  two_node = 0;
1308  } else {
1309  log_printf(LOGSYS_LEVEL_CRIT, "quorum device is not registered, allowing two_node");
1310  update_qdevice_can_operate(0);
1311  }
1312  }
1313  }
1314 
1315  /*
1316  * Enable special features
1317  */
1318  if (!runtime) {
1319  (void)icmap_get_uint8("quorum.allow_downscale", &allow_downscale);
1320  if (icmap_get_uint8("quorum.wait_for_all", &wait_for_all) != CS_OK) {
1321  wait_for_all_autoset = 1;
1322  }
1323  (void)icmap_get_uint8("quorum.last_man_standing", &last_man_standing);
1324  (void)icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window);
1325  (void)icmap_get_uint8("quorum.expected_votes_tracking", &ev_tracking);
1326  (void)icmap_get_uint8("quorum.auto_tie_breaker", &atb);
1327  (void)icmap_get_string("quorum.auto_tie_breaker_node", &atb_string);
1328 
1329  /* auto_tie_breaker defaults to LOWEST */
1330  if (atb) {
1331  auto_tie_breaker = ATB_LOWEST;
1332  icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
1333  }
1334  else {
1335  auto_tie_breaker = ATB_NONE;
1336  if (atb_string) {
1338  "auto_tie_breaker_node: is meaningless if auto_tie_breaker is set to 0");
1339  }
1340  }
1341 
1342  if (atb && atb_string) {
1343  parse_atb_string(atb_string);
1344  }
1345  free(atb_string);
1346  initial_auto_tie_breaker = auto_tie_breaker;
1347 
1348  /* allow_downscale requires ev_tracking */
1349  if (allow_downscale) {
1350  ev_tracking = 1;
1351  }
1352 
1353  if (ev_tracking) {
1354  if (load_ev_tracking_barrier() < 0) {
1355  LEAVE();
1356  return ((char *)"Unable to load ev_tracking file!");
1357  }
1358  update_ev_tracking_barrier(ev_tracking_barrier);
1359  }
1360 
1361  }
1362 
1363  /*
1364  * Changing of wait_for_all during runtime is not supported, but changing of two_node is
1365  * and two_node may set wfa if not configured explicitly. It is safe to unset it
1366  * (or set it back) when two_node changes.
1367  */
1368  if (wait_for_all_autoset) {
1369  wait_for_all = two_node;
1370  }
1371 
1372  /* two_node and auto_tie_breaker are not compatible as two_node uses
1373  * a fence race to decide quorum whereas ATB decides based on node id
1374  */
1375  if (two_node && auto_tie_breaker != ATB_NONE) {
1376  log_printf(LOGSYS_LEVEL_CRIT, "two_node and auto_tie_breaker are both specified but are not compatible.");
1377  log_printf(LOGSYS_LEVEL_CRIT, "two_node has been disabled, please fix your corosync.conf");
1378  two_node = 0;
1379  }
1380 
1381  /* If ATB is set and the cluster has an odd number of nodes then wait_for_all needs
1382  * to be set so that an isolated half+1 without the tie breaker node
1383  * does not have quorum on reboot.
1384  */
1385  if ((auto_tie_breaker != ATB_NONE) && (node_expected_votes % 2) &&
1386  (!wait_for_all)) {
1387  if (last_man_standing) {
1388  /* if LMS is set too, it's a fatal configuration error. We can't dictate to the user what
1389  * they might want so we'll just quit.
1390  */
1391  log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set, the cluster has an odd number of nodes\n");
1392  log_printf(LOGSYS_LEVEL_CRIT, "and last_man_standing is also set. With this situation a better\n");
1393  log_printf(LOGSYS_LEVEL_CRIT, "solution would be to disable LMS, leave ATB enabled, and also\n");
1394  log_printf(LOGSYS_LEVEL_CRIT, "enable wait_for_all (mandatory for ATB in odd-numbered clusters).\n");
1395  log_printf(LOGSYS_LEVEL_CRIT, "Due to this ambiguity, corosync will fail to start. Please fix your corosync.conf\n");
1396  error = (char *)"configuration error: auto_tie_breaker & last_man_standing not available in odd sized cluster";
1397  goto out;
1398  }
1399  else {
1400  log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set and the cluster has an odd number of nodes.\n");
1401  log_printf(LOGSYS_LEVEL_CRIT, "wait_for_all needs to be set for this configuration but it is missing\n");
1402  log_printf(LOGSYS_LEVEL_CRIT, "Therefore auto_tie_breaker has been disabled. Please fix your corosync.conf\n");
1403  auto_tie_breaker = ATB_NONE;
1404  icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
1405  }
1406  }
1407 
1408  /*
1409  * quorum device is not compatible with last_man_standing and auto_tie_breaker
1410  * neither lms or atb can be set at runtime, so there is no need to check for
1411  * runtime incompatibilities, but qdevice can be configured _after_ LMS and ATB have
1412  * been enabled at startup.
1413  */
1414 
1415  if ((have_qdevice) && (last_man_standing)) {
1416  if (!runtime) {
1417  error = (char *)"configuration error: quorum.device is not compatible with last_man_standing";
1418  goto out;
1419  } else {
1420  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with last_man_standing");
1421  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1422  update_qdevice_can_operate(0);
1423  }
1424  }
1425 
1426  if ((have_qdevice) && (auto_tie_breaker != ATB_NONE)) {
1427  if (!runtime) {
1428  error = (char *)"configuration error: quorum.device is not compatible with auto_tie_breaker";
1429  goto out;
1430  } else {
1431  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with auto_tie_breaker");
1432  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1433  update_qdevice_can_operate(0);
1434  }
1435  }
1436 
1437  if ((have_qdevice) && (allow_downscale)) {
1438  if (!runtime) {
1439  error = (char *)"configuration error: quorum.device is not compatible with allow_downscale";
1440  goto out;
1441  } else {
1442  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with allow_downscale");
1443  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1444  update_qdevice_can_operate(0);
1445  }
1446  }
1447 
1448  /*
1449  * if user specifies quorum.expected_votes + quorum.device but NOT the device.votes
1450  * we don't know what the quorum device should vote.
1451  */
1452 
1453  if ((expected_votes) && (have_qdevice) && (qdevice_votes == -1)) {
1454  if (!runtime) {
1455  error = (char *)"configuration error: quorum.device.votes must be specified when quorum.expected_votes is set";
1456  goto out;
1457  } else {
1458  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when quorum.expected_votes is set");
1459  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1460  update_qdevice_can_operate(0);
1461  }
1462  }
1463 
1464  /*
1465  * if user specifies a node list with uneven votes and no device.votes
1466  * we cannot autocalculate the votes
1467  */
1468 
1469  if ((have_qdevice) &&
1470  (qdevice_votes == -1) &&
1471  (have_nodelist) &&
1472  (node_count != node_expected_votes)) {
1473  if (!runtime) {
1474  error = (char *)"configuration error: quorum.device.votes must be specified when not all nodes votes 1";
1475  goto out;
1476  } else {
1477  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when not all nodes votes 1");
1478  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1479  update_qdevice_can_operate(0);
1480  }
1481  }
1482 
1483  /*
1484  * validate quorum device votes vs expected_votes
1485  */
1486 
1487  if ((qdevice_votes > 0) && (expected_votes)) {
1488  int delta = expected_votes - qdevice_votes;
1489  if (delta < 2) {
1490  if (!runtime) {
1491  error = (char *)"configuration error: quorum.device.votes is too high or expected_votes is too low";
1492  goto out;
1493  } else {
1494  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes is too high or expected_votes is too low");
1495  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1496  update_qdevice_can_operate(0);
1497  }
1498  }
1499  }
1500 
1501  /*
1502  * automatically calculate device votes and adjust expected_votes from nodelist
1503  */
1504 
1505  if ((have_qdevice) &&
1506  (qdevice_votes == -1) &&
1507  (!expected_votes) &&
1508  (have_nodelist) &&
1509  (node_count == node_expected_votes)) {
1510  qdevice_votes = node_expected_votes - 1;
1511  node_expected_votes = node_expected_votes + qdevice_votes;
1512  }
1513 
1514  /*
1515  * set this node votes and expected_votes
1516  */
1517  log_printf(LOGSYS_LEVEL_DEBUG, "ev_tracking=%d, ev_tracking_barrier = %d: expected_votes = %d\n", ev_tracking, ev_tracking_barrier, expected_votes);
1518 
1519  if (ev_tracking) {
1520  expected_votes = ev_tracking_barrier;
1521  }
1522 
1523  if (have_nodelist) {
1524  us->votes = node_votes;
1525  us->expected_votes = node_expected_votes;
1526  } else {
1527  us->votes = 1;
1528  (void)icmap_get_uint32("quorum.votes", &us->votes);
1529  }
1530 
1531  if (expected_votes) {
1533  }
1534 
1535  /*
1536  * set qdevice votes
1537  */
1538 
1539  if (!have_qdevice) {
1540  qdevice->votes = 0;
1541  }
1542 
1543  if (qdevice_votes != -1) {
1544  qdevice->votes = qdevice_votes;
1545  }
1546 
1547  update_ev_barrier(us->expected_votes);
1548  update_two_node();
1549  if (wait_for_all) {
1550  if (!runtime) {
1551  update_wait_for_all_status(1);
1552  }
1553  } else if (wait_for_all_autoset && wait_for_all_status) {
1554  /*
1555  * Reset wait for all status for consistency when wfa is auto-unset by 2node.
1556  * wait_for_all_status would be ignored by are_we_quorate anyway.
1557  */
1558  update_wait_for_all_status(0);
1559  }
1560 
1561 out:
1562  LEAVE();
1563  return error;
1564 }
1565 
1566 static void votequorum_refresh_config(
1567  int32_t event,
1568  const char *key_name,
1569  struct icmap_notify_value new_val,
1570  struct icmap_notify_value old_val,
1571  void *user_data)
1572 {
1573  int old_votes, old_expected_votes;
1574  uint8_t reloading;
1575  uint8_t cancel_wfa;
1576  int32_t reload_status;
1577 
1578  ENTER();
1579 
1580  /*
1581  * If a full reload is in progress then don't do anything until it's done and
1582  * can reconfigure it all atomically
1583  */
1584  if (icmap_get_uint8("config.totemconfig_reload_in_progress", &reloading) == CS_OK && reloading) {
1585  return;
1586  }
1587 
1588  /* If a full reload failed, then don't reconfigure */
1589  if ( (strcmp(key_name, "config.totemconfig_reload_in_progress") == 0) &&
1590  (icmap_get_int32("config.reload_status", &reload_status) == CS_OK) &&
1591  (reload_status != CS_OK) ) {
1592  return;
1593  }
1594 
1595  (void)icmap_get_uint8("quorum.cancel_wait_for_all", &cancel_wfa);
1596  if (strcmp(key_name, "quorum.cancel_wait_for_all") == 0 &&
1597  cancel_wfa >= 1) {
1598  icmap_set_uint8("quorum.cancel_wait_for_all", 0);
1599  if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA,
1600  us->node_id, 0)) {
1601  log_printf(LOGSYS_LEVEL_ERROR, "Failed to send Cancel WFA message to other nodes");
1602  }
1603  return;
1604  }
1605 
1606  old_votes = us->votes;
1607  old_expected_votes = us->expected_votes;
1608 
1609  /*
1610  * Reload the configuration
1611  */
1612  votequorum_readconfig(VOTEQUORUM_READCONFIG_RUNTIME);
1613 
1614  /*
1615  * activate new config
1616  */
1617  votequorum_exec_send_nodeinfo(us->node_id);
1618  votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
1619  if (us->votes != old_votes) {
1620  if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES,
1621  us->node_id, us->votes)) {
1622  log_printf(LOGSYS_LEVEL_ERROR, "Failed to send new votes message to other nodes");
1623  }
1624  }
1625  if (us->expected_votes != old_expected_votes) {
1626  if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES,
1627  us->node_id, us->expected_votes)) {
1628  log_printf(LOGSYS_LEVEL_ERROR, "Failed to send expected votes message to other nodes");
1629  }
1630  }
1631 
1632  LEAVE();
1633 }
1634 
1635 static void votequorum_exec_add_config_notification(void)
1636 {
1637  icmap_track_t icmap_track_nodelist = NULL;
1638  icmap_track_t icmap_track_quorum = NULL;
1639  icmap_track_t icmap_track_reload = NULL;
1640 
1641  ENTER();
1642 
1643  icmap_track_add("nodelist.",
1645  votequorum_refresh_config,
1646  NULL,
1647  &icmap_track_nodelist);
1648 
1649  icmap_track_add("quorum.",
1651  votequorum_refresh_config,
1652  NULL,
1653  &icmap_track_quorum);
1654 
1655  icmap_track_add("config.totemconfig_reload_in_progress",
1657  votequorum_refresh_config,
1658  NULL,
1659  &icmap_track_reload);
1660 
1661  LEAVE();
1662 }
1663 
1664 /*
1665  * votequorum_exec core
1666  */
1667 
1668 static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value)
1669 {
1671  struct iovec iov[1];
1672  int ret;
1673 
1674  ENTER();
1675 
1682 
1685 
1686  iov[0].iov_base = (void *)&req_exec_quorum_reconfigure;
1687  iov[0].iov_len = sizeof(req_exec_quorum_reconfigure);
1688 
1689  ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1690 
1691  LEAVE();
1692  return ret;
1693 }
1694 
1695 static int votequorum_exec_send_nodeinfo(uint32_t nodeid)
1696 {
1698  struct iovec iov[1];
1699  struct cluster_node *node;
1700  int ret;
1701 
1702  ENTER();
1703 
1704  node = find_node_by_nodeid(nodeid);
1705  if (!node) {
1706  return -1;
1707  }
1708 
1715  decode_flags(node->flags);
1716  }
1717 
1719  req_exec_quorum_nodeinfo.header.size = sizeof(req_exec_quorum_nodeinfo);
1720 
1721  iov[0].iov_base = (void *)&req_exec_quorum_nodeinfo;
1722  iov[0].iov_len = sizeof(req_exec_quorum_nodeinfo);
1723 
1724  ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1725 
1726  LEAVE();
1727  return ret;
1728 }
1729 
1730 static int votequorum_exec_send_qdevice_reconfigure(const char *oldname, const char *newname)
1731 {
1733  struct iovec iov[1];
1734  int ret;
1735 
1736  ENTER();
1737 
1740 
1741  assert(strlen(oldname) < sizeof(req_exec_quorum_qdevice_reconfigure.oldname));
1743 
1744  assert(strlen(newname) < sizeof(req_exec_quorum_qdevice_reconfigure.newname));
1746 
1747  iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reconfigure;
1748  iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reconfigure);
1749 
1750  ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1751 
1752  LEAVE();
1753  return ret;
1754 }
1755 
1756 static int votequorum_exec_send_qdevice_reg(uint32_t operation, const char *qdevice_name_req)
1757 {
1759  struct iovec iov[1];
1760  int ret;
1761 
1762  ENTER();
1763 
1767 
1768  assert(strlen(qdevice_name_req) < sizeof(req_exec_quorum_qdevice_reg.qdevice_name));
1769  strcpy(req_exec_quorum_qdevice_reg.qdevice_name, qdevice_name_req);
1770 
1771  iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reg;
1772  iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reg);
1773 
1774  ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1775 
1776  LEAVE();
1777  return ret;
1778 }
1779 
1780 static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context)
1781 {
1782  struct res_lib_votequorum_quorum_notification *res_lib_votequorum_notification;
1783  struct qb_list_head *tmp;
1784  struct cluster_node *node;
1785  int i = 0;
1786  int cluster_members = 0;
1787  int size;
1788  char buf[sizeof(struct res_lib_votequorum_quorum_notification) + sizeof(struct votequorum_node) * (PROCESSOR_COUNT_MAX + 2)];
1789 
1790  ENTER();
1791 
1792  log_printf(LOGSYS_LEVEL_DEBUG, "Sending quorum callback, quorate = %d", cluster_is_quorate);
1793 
1794  qb_list_for_each(tmp, &cluster_members_list) {
1795  node = qb_list_entry(tmp, struct cluster_node, list);
1796  cluster_members++;
1797  }
1799  cluster_members++;
1800  }
1801 
1802  size = sizeof(struct res_lib_votequorum_quorum_notification) + sizeof(struct votequorum_node) * cluster_members;
1803 
1804  res_lib_votequorum_notification = (struct res_lib_votequorum_quorum_notification *)&buf;
1805  res_lib_votequorum_notification->quorate = cluster_is_quorate;
1806  res_lib_votequorum_notification->context = context;
1807  res_lib_votequorum_notification->node_list_entries = cluster_members;
1808  res_lib_votequorum_notification->header.id = MESSAGE_RES_VOTEQUORUM_QUORUM_NOTIFICATION;
1809  res_lib_votequorum_notification->header.size = size;
1810  res_lib_votequorum_notification->header.error = CS_OK;
1811 
1812  /* Send all known nodes and their states */
1813  qb_list_for_each(tmp, &cluster_members_list) {
1814  node = qb_list_entry(tmp, struct cluster_node, list);
1815  res_lib_votequorum_notification->node_list[i].nodeid = node->node_id;
1816  res_lib_votequorum_notification->node_list[i++].state = node->state;
1817  }
1819  res_lib_votequorum_notification->node_list[i].nodeid = VOTEQUORUM_QDEVICE_NODEID;
1820  res_lib_votequorum_notification->node_list[i++].state = qdevice->state;
1821  }
1822 
1823  /* Send it to all interested parties */
1824  if (conn) {
1825  int ret = corosync_api->ipc_dispatch_send(conn, &buf, size);
1826  LEAVE();
1827  return ret;
1828  } else {
1829  struct quorum_pd *qpd;
1830 
1831  qb_list_for_each(tmp, &trackers_list) {
1832  qpd = qb_list_entry(tmp, struct quorum_pd, list);
1833  res_lib_votequorum_notification->context = qpd->tracking_context;
1834  corosync_api->ipc_dispatch_send(qpd->conn, &buf, size);
1835  }
1836  }
1837 
1838  LEAVE();
1839 
1840  return 0;
1841 }
1842 
1843 static int votequorum_exec_send_nodelist_notification(void *conn, uint64_t context)
1844 {
1845  struct res_lib_votequorum_nodelist_notification *res_lib_votequorum_notification;
1846  int i = 0;
1847  int size;
1848  struct qb_list_head *tmp;
1849  char buf[sizeof(struct res_lib_votequorum_nodelist_notification) + sizeof(uint32_t) * quorum_members_entries];
1850 
1851  ENTER();
1852 
1853  log_printf(LOGSYS_LEVEL_DEBUG, "Sending nodelist callback. ring_id = " CS_PRI_RING_ID, quorum_ringid.nodeid, quorum_ringid.seq);
1854 
1855  size = sizeof(struct res_lib_votequorum_nodelist_notification) + sizeof(uint32_t) * quorum_members_entries;
1856 
1857  res_lib_votequorum_notification = (struct res_lib_votequorum_nodelist_notification *)&buf;
1858  res_lib_votequorum_notification->node_list_entries = quorum_members_entries;
1859  res_lib_votequorum_notification->ring_id.nodeid = quorum_ringid.nodeid;
1860  res_lib_votequorum_notification->ring_id.seq = quorum_ringid.seq;
1861  res_lib_votequorum_notification->context = context;
1862 
1863  for (i=0; i<quorum_members_entries; i++) {
1864  res_lib_votequorum_notification->node_list[i] = quorum_members[i];
1865  }
1866 
1867  res_lib_votequorum_notification->header.id = MESSAGE_RES_VOTEQUORUM_NODELIST_NOTIFICATION;
1868  res_lib_votequorum_notification->header.size = size;
1869  res_lib_votequorum_notification->header.error = CS_OK;
1870 
1871  /* Send it to all interested parties */
1872  if (conn) {
1873  int ret = corosync_api->ipc_dispatch_send(conn, &buf, size);
1874  LEAVE();
1875  return ret;
1876  } else {
1877  struct quorum_pd *qpd;
1878 
1879  qb_list_for_each(tmp, &trackers_list) {
1880  qpd = qb_list_entry(tmp, struct quorum_pd, list);
1881  res_lib_votequorum_notification->context = qpd->tracking_context;
1882  corosync_api->ipc_dispatch_send(qpd->conn, &buf, size);
1883  }
1884  }
1885 
1886  LEAVE();
1887 
1888  return 0;
1889 }
1890 
1891 static void votequorum_exec_send_expectedvotes_notification(void)
1892 {
1894  struct quorum_pd *qpd;
1895  struct qb_list_head *tmp;
1896 
1897  ENTER();
1898 
1899  log_printf(LOGSYS_LEVEL_DEBUG, "Sending expected votes callback");
1900 
1905 
1906  qb_list_for_each(tmp, &trackers_list) {
1907  qpd = qb_list_entry(tmp, struct quorum_pd, list);
1911  }
1912 
1913  LEAVE();
1914 }
1915 
1916 static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message)
1917 {
1918  ENTER();
1919 
1920  LEAVE();
1921 }
1922 
1923 static void message_handler_req_exec_votequorum_qdevice_reconfigure (
1924  const void *message,
1925  unsigned int nodeid)
1926 {
1928 
1929  ENTER();
1930 
1931  log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice name change req from node " CS_PRI_NODE_ID " [from: %s to: %s]",
1932  nodeid,
1935 
1936  if (!strcmp(req_exec_quorum_qdevice_reconfigure->oldname, qdevice_name)) {
1937  log_printf(LOGSYS_LEVEL_DEBUG, "Allowing qdevice rename");
1938  memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
1939  strcpy(qdevice_name, req_exec_quorum_qdevice_reconfigure->newname);
1940  /*
1941  * TODO: notify qdevices about name change?
1942  * this is not relevant for now and can wait later on since
1943  * qdevices are local only and libvotequorum is not final
1944  */
1945  }
1946 
1947  LEAVE();
1948 }
1949 
1950 static void exec_votequorum_qdevice_reg_endian_convert (void *message)
1951 {
1953 
1954  ENTER();
1955 
1957 
1958  LEAVE();
1959 }
1960 
1961 static void message_handler_req_exec_votequorum_qdevice_reg (
1962  const void *message,
1963  unsigned int nodeid)
1964 {
1967  int wipe_qdevice_name = 1;
1968  struct cluster_node *node = NULL;
1969  struct qb_list_head *tmp;
1970  cs_error_t error = CS_OK;
1971 
1972  ENTER();
1973 
1974  log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice op %u req from node " CS_PRI_NODE_ID " [%s]",
1977 
1979  {
1981  if (nodeid != us->node_id) {
1982  if (!strlen(qdevice_name)) {
1983  log_printf(LOGSYS_LEVEL_DEBUG, "Remote qdevice name recorded");
1984  strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name);
1985  }
1986  LEAVE();
1987  return;
1988  }
1989 
1990  /*
1991  * protect against the case where we broadcast qdevice registration
1992  * to new memebers, we receive the message back, but there is no registration
1993  * connection in progress
1994  */
1996  LEAVE();
1997  return;
1998  }
1999 
2000  /*
2001  * this should NEVER happen
2002  */
2003  if (!qdevice_reg_conn) {
2004  log_printf(LOGSYS_LEVEL_WARNING, "Unable to determine origin of the qdevice register call!");
2005  LEAVE();
2006  return;
2007  }
2008 
2009  /*
2010  * registering our own device in this case
2011  */
2012  if (!strlen(qdevice_name)) {
2013  strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name);
2014  }
2015 
2016  /*
2017  * check if it is our device or something else
2018  */
2020  qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) {
2022  votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
2023  votequorum_exec_send_nodeinfo(us->node_id);
2024  } else {
2026  "A new qdevice with different name (new: %s old: %s) is trying to register!",
2027  req_exec_quorum_qdevice_reg->qdevice_name, qdevice_name);
2028  error = CS_ERR_EXIST;
2029  }
2030 
2033  res_lib_votequorum_status.header.error = error;
2034  corosync_api->ipc_response_send(qdevice_reg_conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2035  qdevice_reg_conn = NULL;
2036  break;
2038  qb_list_for_each(tmp, &cluster_members_list) {
2039  node = qb_list_entry(tmp, struct cluster_node, list);
2040  if ((node->state == NODESTATE_MEMBER) &&
2042  wipe_qdevice_name = 0;
2043  }
2044  }
2045 
2046  if (wipe_qdevice_name) {
2047  memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
2048  }
2049 
2050  break;
2051  }
2052  LEAVE();
2053 }
2054 
2055 static void exec_votequorum_nodeinfo_endian_convert (void *message)
2056 {
2057  struct req_exec_quorum_nodeinfo *nodeinfo = message;
2058 
2059  ENTER();
2060 
2061  nodeinfo->nodeid = swab32(nodeinfo->nodeid);
2062  nodeinfo->votes = swab32(nodeinfo->votes);
2063  nodeinfo->expected_votes = swab32(nodeinfo->expected_votes);
2064  nodeinfo->flags = swab32(nodeinfo->flags);
2065 
2066  LEAVE();
2067 }
2068 
2069 static void message_handler_req_exec_votequorum_nodeinfo (
2070  const void *message,
2071  unsigned int sender_nodeid)
2072 {
2073  const struct req_exec_quorum_nodeinfo *req_exec_quorum_nodeinfo = message;
2074  struct cluster_node *node = NULL;
2075  int old_votes;
2076  int old_expected;
2077  uint32_t old_flags;
2078  nodestate_t old_state;
2079  int new_node = 0;
2080  int allow_downgrade = 0;
2081  int by_node = 0;
2082  unsigned int nodeid = req_exec_quorum_nodeinfo->nodeid;
2083 
2084  ENTER();
2085 
2086  log_printf(LOGSYS_LEVEL_DEBUG, "got nodeinfo message from cluster node " CS_PRI_NODE_ID, sender_nodeid);
2087  log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message[" CS_PRI_NODE_ID "]: votes: %d, expected: %d flags: %d",
2088  nodeid,
2092 
2094  decode_flags(req_exec_quorum_nodeinfo->flags);
2095  }
2096 
2097  node = find_node_by_nodeid(nodeid);
2098  if (!node) {
2099  node = allocate_node(nodeid);
2100  new_node = 1;
2101  }
2102  if (!node) {
2103  corosync_api->error_memory_failure();
2104  LEAVE();
2105  return;
2106  }
2107 
2108  if (new_node) {
2109  old_votes = 0;
2110  old_expected = 0;
2111  old_state = NODESTATE_DEAD;
2112  old_flags = 0;
2113  } else {
2114  old_votes = node->votes;
2115  old_expected = node->expected_votes;
2116  old_state = node->state;
2117  old_flags = node->flags;
2118  }
2119 
2121  struct cluster_node *sender_node = find_node_by_nodeid(sender_nodeid);
2122 
2123  assert(sender_node != NULL);
2124 
2125  if ((!cluster_is_quorate) &&
2126  (sender_node->flags & NODE_FLAGS_QUORATE)) {
2128  } else {
2129  node->votes = max(node->votes, req_exec_quorum_nodeinfo->votes);
2130  }
2131  goto recalculate;
2132  }
2133 
2134  /* Update node state */
2137  node->state = NODESTATE_MEMBER;
2138 
2139  if (node->flags & NODE_FLAGS_LEAVING) {
2140  node->state = NODESTATE_LEAVING;
2141  allow_downgrade = 1;
2142  by_node = 1;
2143  }
2144 
2145  if ((!cluster_is_quorate) &&
2146  (node->flags & NODE_FLAGS_QUORATE)) {
2147  allow_downgrade = 1;
2149  }
2150 
2151  if (node->flags & NODE_FLAGS_QUORATE || (ev_tracking)) {
2153  } else {
2154  node->expected_votes = us->expected_votes;
2155  }
2156 
2157  if ((last_man_standing) && (node->votes > 1)) {
2158  log_printf(LOGSYS_LEVEL_WARNING, "Last Man Standing feature is supported only when all"
2159  "cluster nodes votes are set to 1. Disabling LMS.");
2160  last_man_standing = 0;
2161  if (last_man_standing_timer_set) {
2162  corosync_api->timer_delete(last_man_standing_timer);
2163  last_man_standing_timer_set = 0;
2164  }
2165  }
2166 
2167 recalculate:
2168  if ((new_node) ||
2169  (nodeid == us->node_id) ||
2170  (node->flags & NODE_FLAGS_FIRST) ||
2171  (old_votes != node->votes) ||
2172  (old_expected != node->expected_votes) ||
2173  (old_flags != node->flags) ||
2174  (old_state != node->state)) {
2175  recalculate_quorum(allow_downgrade, by_node);
2176  }
2177 
2178  if ((wait_for_all) &&
2179  (!(node->flags & NODE_FLAGS_WFASTATUS)) &&
2180  (node->flags & NODE_FLAGS_QUORATE)) {
2181  update_wait_for_all_status(0);
2182  }
2183 
2184  LEAVE();
2185 }
2186 
2187 static void exec_votequorum_reconfigure_endian_convert (void *message)
2188 {
2189  struct req_exec_quorum_reconfigure *reconfigure = message;
2190 
2191  ENTER();
2192 
2193  reconfigure->nodeid = swab32(reconfigure->nodeid);
2194  reconfigure->value = swab32(reconfigure->value);
2195 
2196  LEAVE();
2197 }
2198 
2199 static void message_handler_req_exec_votequorum_reconfigure (
2200  const void *message,
2201  unsigned int nodeid)
2202 {
2204  struct cluster_node *node;
2205 
2206  ENTER();
2207 
2208  log_printf(LOGSYS_LEVEL_DEBUG, "got reconfigure message from cluster node " CS_PRI_NODE_ID " for " CS_PRI_NODE_ID,
2210 
2212  {
2214  update_node_expected_votes(req_exec_quorum_reconfigure->value);
2215  votequorum_exec_send_expectedvotes_notification();
2216  update_ev_barrier(req_exec_quorum_reconfigure->value);
2217  if (ev_tracking) {
2218  us->expected_votes = max(us->expected_votes, ev_tracking_barrier);
2219  }
2220  recalculate_quorum(1, 0); /* Allow decrease */
2221  break;
2222 
2224  node = find_node_by_nodeid(req_exec_quorum_reconfigure->nodeid);
2225  if (!node) {
2226  LEAVE();
2227  return;
2228  }
2230  recalculate_quorum(1, 0); /* Allow decrease */
2231  break;
2232 
2234  update_wait_for_all_status(0);
2235  log_printf(LOGSYS_LEVEL_INFO, "wait_for_all_status reset by user on node " CS_PRI_NODE_ID ".",
2237  recalculate_quorum(0, 0);
2238 
2239  break;
2240 
2241  }
2242 
2243  LEAVE();
2244 }
2245 
2246 static int votequorum_exec_exit_fn (void)
2247 {
2248  int ret = 0;
2249 
2250  ENTER();
2251 
2252  /*
2253  * tell the other nodes we are leaving
2254  */
2255 
2256  if (allow_downscale) {
2257  us->flags |= NODE_FLAGS_LEAVING;
2258  ret = votequorum_exec_send_nodeinfo(us->node_id);
2259  }
2260 
2261  if ((ev_tracking) && (ev_tracking_fd != -1)) {
2262  close(ev_tracking_fd);
2263  }
2264 
2265 
2266  LEAVE();
2267  return ret;
2268 }
2269 
2270 static void votequorum_set_icmap_ro_keys(void)
2271 {
2272  icmap_set_ro_access("quorum.allow_downscale", CS_FALSE, CS_TRUE);
2273  icmap_set_ro_access("quorum.wait_for_all", CS_FALSE, CS_TRUE);
2274  icmap_set_ro_access("quorum.last_man_standing", CS_FALSE, CS_TRUE);
2275  icmap_set_ro_access("quorum.last_man_standing_window", CS_FALSE, CS_TRUE);
2276  icmap_set_ro_access("quorum.expected_votes_tracking", CS_FALSE, CS_TRUE);
2277  icmap_set_ro_access("quorum.auto_tie_breaker", CS_FALSE, CS_TRUE);
2278  icmap_set_ro_access("quorum.auto_tie_breaker_node", CS_FALSE, CS_TRUE);
2279 }
2280 
2281 static char *votequorum_exec_init_fn (struct corosync_api_v1 *api)
2282 {
2283  char *error = NULL;
2284 
2285  ENTER();
2286 
2287  /*
2288  * make sure we start clean
2289  */
2290  qb_list_init(&cluster_members_list);
2291  qb_list_init(&trackers_list);
2292  qdevice = NULL;
2293  us = NULL;
2294  memset(cluster_nodes, 0, sizeof(cluster_nodes));
2295 
2296  /*
2297  * Allocate a cluster_node for qdevice
2298  */
2299  qdevice = allocate_node(VOTEQUORUM_QDEVICE_NODEID);
2300  if (!qdevice) {
2301  LEAVE();
2302  return ((char *)"Could not allocate node.");
2303  }
2304  qdevice->votes = 0;
2305  memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
2306 
2307  /*
2308  * Allocate a cluster_node for us
2309  */
2310  us = allocate_node(corosync_api->totem_nodeid_get());
2311  if (!us) {
2312  LEAVE();
2313  return ((char *)"Could not allocate node.");
2314  }
2315 
2316  icmap_set_uint32("runtime.votequorum.this_node_id", us->node_id);
2317 
2318  us->state = NODESTATE_MEMBER;
2319  us->votes = 1;
2320  us->flags |= NODE_FLAGS_FIRST;
2321 
2322  error = votequorum_readconfig(VOTEQUORUM_READCONFIG_STARTUP);
2323  if (error) {
2324  return error;
2325  }
2326  recalculate_quorum(0, 0);
2327 
2328  /*
2329  * Set RO keys in icmap
2330  */
2331  votequorum_set_icmap_ro_keys();
2332 
2333  /*
2334  * Listen for changes
2335  */
2336  votequorum_exec_add_config_notification();
2337 
2338  /*
2339  * Start us off with one node
2340  */
2341  votequorum_exec_send_nodeinfo(us->node_id);
2342 
2343  LEAVE();
2344 
2345  return (NULL);
2346 }
2347 
2348 /*
2349  * votequorum service core
2350  */
2351 
2352 static void votequorum_last_man_standing_timer_fn(void *arg)
2353 {
2354  ENTER();
2355 
2356  last_man_standing_timer_set = 0;
2357  if (cluster_is_quorate) {
2358  recalculate_quorum(1,1);
2359  }
2360 
2361  LEAVE();
2362 }
2363 
2364 static void votequorum_sync_init (
2365  const unsigned int *trans_list, size_t trans_list_entries,
2366  const unsigned int *member_list, size_t member_list_entries,
2367  const struct memb_ring_id *ring_id)
2368 {
2369  int i, j;
2370  int found;
2371  int left_nodes;
2372  struct cluster_node *node;
2373 
2374  ENTER();
2375 
2376  sync_in_progress = 1;
2377  sync_nodeinfo_sent = 0;
2378  sync_wait_for_poll_or_timeout = 0;
2379 
2380  if (member_list_entries > 1) {
2381  us->flags &= ~NODE_FLAGS_FIRST;
2382  }
2383 
2384  /*
2385  * we don't need to track which nodes have left directly,
2386  * since that info is in the node db, but we need to know
2387  * if somebody has left for last_man_standing
2388  */
2389  left_nodes = 0;
2390  for (i = 0; i < quorum_members_entries; i++) {
2391  found = 0;
2392  for (j = 0; j < member_list_entries; j++) {
2393  if (quorum_members[i] == member_list[j]) {
2394  found = 1;
2395  break;
2396  }
2397  }
2398  if (found == 0) {
2399  left_nodes = 1;
2400  node = find_node_by_nodeid(quorum_members[i]);
2401  if (node) {
2402  node->state = NODESTATE_DEAD;
2403  }
2404  }
2405  }
2406 
2407  if (last_man_standing) {
2408  if (((member_list_entries >= quorum) && (left_nodes)) ||
2409  ((member_list_entries <= quorum) && (auto_tie_breaker != ATB_NONE) && (check_low_node_id_partition() == 1))) {
2410  if (last_man_standing_timer_set) {
2411  corosync_api->timer_delete(last_man_standing_timer);
2412  last_man_standing_timer_set = 0;
2413  }
2414  corosync_api->timer_add_duration((unsigned long long)last_man_standing_window*1000000,
2415  NULL, votequorum_last_man_standing_timer_fn,
2416  &last_man_standing_timer);
2417  last_man_standing_timer_set = 1;
2418  }
2419  }
2420 
2421  memcpy(previous_quorum_members, quorum_members, sizeof(unsigned int) * quorum_members_entries);
2422  previous_quorum_members_entries = quorum_members_entries;
2423 
2424  memcpy(quorum_members, member_list, sizeof(unsigned int) * member_list_entries);
2425  quorum_members_entries = member_list_entries;
2426  memcpy(&quorum_ringid, ring_id, sizeof(*ring_id));
2427 
2429  /*
2430  * Reset poll timer. Sync waiting is interrupted on valid qdevice poll or after timeout
2431  */
2432  if (qdevice_timer_set) {
2433  corosync_api->timer_delete(qdevice_timer);
2434  }
2435  corosync_api->timer_add_duration((unsigned long long)qdevice_sync_timeout*1000000, qdevice,
2436  qdevice_timer_fn, &qdevice_timer);
2437  qdevice_timer_set = 1;
2438  sync_wait_for_poll_or_timeout = 1;
2439 
2440  log_printf(LOGSYS_LEVEL_INFO, "waiting for quorum device %s poll (but maximum for %u ms)",
2441  qdevice_name, qdevice_sync_timeout);
2442  }
2443 
2444  LEAVE();
2445 }
2446 
2447 static int votequorum_sync_process (void)
2448 {
2449  if (!sync_nodeinfo_sent) {
2450  votequorum_exec_send_nodeinfo(us->node_id);
2451  votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
2452  if (strlen(qdevice_name)) {
2453  votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER,
2454  qdevice_name);
2455  }
2456  votequorum_exec_send_nodelist_notification(NULL, 0LL);
2457  sync_nodeinfo_sent = 1;
2458  }
2459 
2460  if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED && sync_wait_for_poll_or_timeout) {
2461  /*
2462  * Waiting for qdevice to poll with new ringid or timeout
2463  */
2464 
2465  return (-1);
2466  }
2467 
2468  return 0;
2469 }
2470 
2471 static void votequorum_sync_activate (void)
2472 {
2473  recalculate_quorum(0, 0);
2474  quorum_callback(quorum_members, quorum_members_entries,
2475  cluster_is_quorate, &quorum_ringid);
2476  votequorum_exec_send_quorum_notification(NULL, 0L);
2477 
2478  sync_in_progress = 0;
2479 }
2480 
2481 static void votequorum_sync_abort (void)
2482 {
2483 
2484 }
2485 
2487  quorum_set_quorate_fn_t q_set_quorate_fn)
2488 {
2489  char *error;
2490 
2491  ENTER();
2492 
2493  if (q_set_quorate_fn == NULL) {
2494  return ((char *)"Quorate function not set");
2495  }
2496 
2497  corosync_api = api;
2498  quorum_callback = q_set_quorate_fn;
2499 
2500  error = corosync_service_link_and_init(corosync_api,
2501  &votequorum_service[0]);
2502  if (error) {
2503  return (error);
2504  }
2505 
2506  LEAVE();
2507 
2508  return (NULL);
2509 }
2510 
2511 /*
2512  * Library Handler init/fini
2513  */
2514 
2515 static int quorum_lib_init_fn (void *conn)
2516 {
2517  struct quorum_pd *pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2518 
2519  ENTER();
2520 
2521  qb_list_init (&pd->list);
2522  pd->conn = conn;
2523 
2524  LEAVE();
2525  return (0);
2526 }
2527 
2528 static int quorum_lib_exit_fn (void *conn)
2529 {
2530  struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2531 
2532  ENTER();
2533 
2534  if (quorum_pd->tracking_enabled) {
2535  qb_list_del (&quorum_pd->list);
2536  qb_list_init (&quorum_pd->list);
2537  }
2538 
2539  LEAVE();
2540 
2541  return (0);
2542 }
2543 
2544 /*
2545  * library internal functions
2546  */
2547 
2548 static void qdevice_timer_fn(void *arg)
2549 {
2550  ENTER();
2551 
2552  if ((!(us->flags & NODE_FLAGS_QDEVICE_ALIVE)) ||
2553  (!qdevice_timer_set)) {
2554  LEAVE();
2555  return;
2556  }
2557 
2560  log_printf(LOGSYS_LEVEL_INFO, "lost contact with quorum device %s", qdevice_name);
2561  votequorum_exec_send_nodeinfo(us->node_id);
2562 
2563  qdevice_timer_set = 0;
2564  sync_wait_for_poll_or_timeout = 0;
2565 
2566  LEAVE();
2567 }
2568 
2569 /*
2570  * Library Handler Functions
2571  */
2572 
2573 static void message_handler_req_lib_votequorum_getinfo (void *conn, const void *message)
2574 {
2577  struct cluster_node *node;
2578  unsigned int highest_expected = 0;
2579  unsigned int total_votes = 0;
2580  cs_error_t error = CS_OK;
2582 
2583  ENTER();
2584 
2585  log_printf(LOGSYS_LEVEL_DEBUG, "got getinfo request on %p for node " CS_PRI_NODE_ID, conn, req_lib_votequorum_getinfo->nodeid);
2586 
2588  nodeid = us->node_id;
2589  }
2590 
2591  node = find_node_by_nodeid(nodeid);
2592  if (node) {
2593  struct cluster_node *iternode;
2594  struct qb_list_head *nodelist;
2595 
2596  qb_list_for_each(nodelist, &cluster_members_list) {
2597  iternode = qb_list_entry(nodelist, struct cluster_node, list);
2598 
2599  if (iternode->state == NODESTATE_MEMBER) {
2600  highest_expected =
2601  max(highest_expected, iternode->expected_votes);
2602  total_votes += iternode->votes;
2603  }
2604  }
2605 
2606  if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) {
2607  total_votes += qdevice->votes;
2608  }
2609 
2610  switch(node->state) {
2611  case NODESTATE_MEMBER:
2613  break;
2614  case NODESTATE_DEAD:
2616  break;
2617  case NODESTATE_LEAVING:
2619  break;
2620  default:
2622  break;
2623  }
2627  res_lib_votequorum_getinfo.highest_expected = highest_expected;
2628 
2633 
2634  if (two_node) {
2636  }
2637  if (cluster_is_quorate) {
2639  }
2640  if (wait_for_all) {
2642  }
2643  if (last_man_standing) {
2645  }
2646  if (auto_tie_breaker != ATB_NONE) {
2648  }
2649  if (allow_downscale) {
2651  }
2652 
2654  strcpy(res_lib_votequorum_getinfo.qdevice_name, qdevice_name);
2656 
2657  if (node->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
2659  }
2660  if (node->flags & NODE_FLAGS_QDEVICE_ALIVE) {
2662  }
2663  if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) {
2665  }
2666  if (node->flags & NODE_FLAGS_QDEVICE_MASTER_WINS) {
2668  }
2669  } else {
2670  error = CS_ERR_NOT_EXIST;
2671  }
2672 
2675  res_lib_votequorum_getinfo.header.error = error;
2677  log_printf(LOGSYS_LEVEL_DEBUG, "getinfo response error: %d", error);
2678 
2679  LEAVE();
2680 }
2681 
2682 static void message_handler_req_lib_votequorum_setexpected (void *conn, const void *message)
2683 {
2686  cs_error_t error = CS_OK;
2687  unsigned int newquorum;
2688  unsigned int total_votes;
2689  uint8_t allow_downscale_status = 0;
2690 
2691  ENTER();
2692 
2693  allow_downscale_status = allow_downscale;
2694  allow_downscale = 0;
2695 
2696  /*
2697  * Validate new expected votes
2698  */
2699  newquorum = calculate_quorum(1, req_lib_votequorum_setexpected->expected_votes, &total_votes);
2700  allow_downscale = allow_downscale_status;
2701  /*
2702  * Setting expected_votes < total_votes doesn't make sense.
2703  * For quorate cluster prevent cluster to become unquorate.
2704  */
2705  if (req_lib_votequorum_setexpected->expected_votes < total_votes ||
2706  (cluster_is_quorate && (newquorum > total_votes))) {
2707  error = CS_ERR_INVALID_PARAM;
2708  goto error_exit;
2709  }
2710  update_node_expected_votes(req_lib_votequorum_setexpected->expected_votes);
2711 
2712  if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES, us->node_id,
2714  error = CS_ERR_NO_RESOURCES;
2715  }
2716 
2717 error_exit:
2720  res_lib_votequorum_status.header.error = error;
2722 
2723  LEAVE();
2724 }
2725 
2726 static void message_handler_req_lib_votequorum_setvotes (void *conn, const void *message)
2727 {
2730  struct cluster_node *node;
2731  unsigned int newquorum;
2732  unsigned int total_votes;
2733  unsigned int saved_votes;
2734  cs_error_t error = CS_OK;
2735  unsigned int nodeid;
2736 
2737  ENTER();
2738 
2740  node = find_node_by_nodeid(nodeid);
2741  if (!node) {
2742  error = CS_ERR_NAME_NOT_FOUND;
2743  goto error_exit;
2744  }
2745 
2746  /*
2747  * Check votes is valid
2748  */
2749  saved_votes = node->votes;
2751 
2752  newquorum = calculate_quorum(1, 0, &total_votes);
2753 
2754  if (newquorum < total_votes / 2 ||
2755  newquorum > total_votes) {
2756  node->votes = saved_votes;
2757  error = CS_ERR_INVALID_PARAM;
2758  goto error_exit;
2759  }
2760 
2761  if (votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES, nodeid,
2763  error = CS_ERR_NO_RESOURCES;
2764  }
2765 
2766 error_exit:
2769  res_lib_votequorum_status.header.error = error;
2771 
2772  LEAVE();
2773 }
2774 
2775 static void message_handler_req_lib_votequorum_trackstart (void *conn,
2776  const void *message)
2777 {
2780  struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2781  cs_error_t error = CS_OK;
2782 
2783  ENTER();
2784 
2785  /*
2786  * If an immediate listing of the current cluster membership
2787  * is requested, generate membership list
2788  */
2791  log_printf(LOGSYS_LEVEL_DEBUG, "sending initial status to %p", conn);
2792  votequorum_exec_send_nodelist_notification(conn, req_lib_votequorum_trackstart->context);
2793  votequorum_exec_send_quorum_notification(conn, req_lib_votequorum_trackstart->context);
2794  }
2795 
2796  if (quorum_pd->tracking_enabled) {
2797  error = CS_ERR_EXIST;
2798  goto response_send;
2799  }
2800 
2801  /*
2802  * Record requests for tracking
2803  */
2806 
2810 
2811  qb_list_add (&quorum_pd->list, &trackers_list);
2812  }
2813 
2814 response_send:
2817  res_lib_votequorum_status.header.error = error;
2819 
2820  LEAVE();
2821 }
2822 
2823 static void message_handler_req_lib_votequorum_trackstop (void *conn,
2824  const void *message)
2825 {
2827  struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2828  int error = CS_OK;
2829 
2830  ENTER();
2831 
2832  if (quorum_pd->tracking_enabled) {
2833  error = CS_OK;
2835  qb_list_del (&quorum_pd->list);
2836  qb_list_init (&quorum_pd->list);
2837  } else {
2838  error = CS_ERR_NOT_EXIST;
2839  }
2840 
2843  res_lib_votequorum_status.header.error = error;
2845 
2846  LEAVE();
2847 }
2848 
2849 static void message_handler_req_lib_votequorum_qdevice_register (void *conn,
2850  const void *message)
2851 {
2854  cs_error_t error = CS_OK;
2855 
2856  ENTER();
2857 
2858  if (!qdevice_can_operate) {
2859  log_printf(LOGSYS_LEVEL_INFO, "Registration of quorum device is disabled by incorrect corosync.conf. See logs for more information");
2860  error = CS_ERR_ACCESS;
2861  goto out;
2862  }
2863 
2866  qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) {
2867  goto out;
2868  } else {
2870  "A new qdevice with different name (new: %s old: %s) is trying to re-register!",
2871  req_lib_votequorum_qdevice_register->name, qdevice_name);
2872  error = CS_ERR_EXIST;
2873  goto out;
2874  }
2875  } else {
2876  if (qdevice_reg_conn != NULL) {
2878  "Registration request already in progress");
2879  error = CS_ERR_TRY_AGAIN;
2880  goto out;
2881  }
2882  qdevice_reg_conn = conn;
2883  if (votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER,
2886  "Unable to send qdevice registration request to cluster");
2887  error = CS_ERR_TRY_AGAIN;
2888  qdevice_reg_conn = NULL;
2889  } else {
2890  LEAVE();
2891  return;
2892  }
2893  }
2894 
2895 out:
2896 
2899  res_lib_votequorum_status.header.error = error;
2901 
2902  LEAVE();
2903 }
2904 
2905 static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn,
2906  const void *message)
2907 {
2910  cs_error_t error = CS_OK;
2911 
2912  ENTER();
2913 
2916  error = CS_ERR_INVALID_PARAM;
2917  goto out;
2918  }
2919  if (qdevice_timer_set) {
2920  corosync_api->timer_delete(qdevice_timer);
2921  qdevice_timer_set = 0;
2922  sync_wait_for_poll_or_timeout = 0;
2923  }
2928  votequorum_exec_send_nodeinfo(us->node_id);
2929  votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER,
2931  } else {
2932  error = CS_ERR_NOT_EXIST;
2933  }
2934 
2935 out:
2938  res_lib_votequorum_status.header.error = error;
2940 
2941  LEAVE();
2942 }
2943 
2944 static void message_handler_req_lib_votequorum_qdevice_update (void *conn,
2945  const void *message)
2946 {
2949  cs_error_t error = CS_OK;
2950 
2951  ENTER();
2952 
2955  error = CS_ERR_INVALID_PARAM;
2956  goto out;
2957  }
2958  votequorum_exec_send_qdevice_reconfigure(req_lib_votequorum_qdevice_update->oldname,
2960  } else {
2961  error = CS_ERR_NOT_EXIST;
2962  }
2963 
2964 out:
2967  res_lib_votequorum_status.header.error = error;
2969 
2970  LEAVE();
2971 }
2972 
2973 static void message_handler_req_lib_votequorum_qdevice_poll (void *conn,
2974  const void *message)
2975 {
2978  cs_error_t error = CS_OK;
2979  uint32_t oldflags;
2980 
2981  ENTER();
2982 
2983  if (!qdevice_can_operate) {
2984  error = CS_ERR_ACCESS;
2985  goto out;
2986  }
2987 
2989  if (!(req_lib_votequorum_qdevice_poll->ring_id.nodeid == quorum_ringid.nodeid &&
2990  req_lib_votequorum_qdevice_poll->ring_id.seq == quorum_ringid.seq)) {
2991  log_printf(LOGSYS_LEVEL_DEBUG, "Received poll ring id (" CS_PRI_RING_ID ") != last sync "
2992  "ring id (" CS_PRI_RING_ID "). Ignoring poll call.",
2994  quorum_ringid.nodeid, quorum_ringid.seq);
2995  error = CS_ERR_MESSAGE_ERROR;
2996  goto out;
2997  }
2999  error = CS_ERR_INVALID_PARAM;
3000  goto out;
3001  }
3002 
3003  if (qdevice_timer_set) {
3004  corosync_api->timer_delete(qdevice_timer);
3005  qdevice_timer_set = 0;
3006  }
3007 
3008  oldflags = us->flags;
3009 
3011 
3014  } else {
3016  }
3017 
3018  if (us->flags != oldflags) {
3019  votequorum_exec_send_nodeinfo(us->node_id);
3020  }
3021 
3022  corosync_api->timer_add_duration((unsigned long long)qdevice_timeout*1000000, qdevice,
3023  qdevice_timer_fn, &qdevice_timer);
3024  qdevice_timer_set = 1;
3025  sync_wait_for_poll_or_timeout = 0;
3026  } else {
3027  error = CS_ERR_NOT_EXIST;
3028  }
3029 
3030 out:
3033  res_lib_votequorum_status.header.error = error;
3035 
3036  LEAVE();
3037 }
3038 
3039 static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn,
3040  const void *message)
3041 {
3044  cs_error_t error = CS_OK;
3045  uint32_t oldflags = us->flags;
3046 
3047  ENTER();
3048 
3049  if (!qdevice_can_operate) {
3050  error = CS_ERR_ACCESS;
3051  goto out;
3052  }
3053 
3056  error = CS_ERR_INVALID_PARAM;
3057  goto out;
3058  }
3059 
3062  } else {
3064  }
3065 
3066  if (us->flags != oldflags) {
3067  votequorum_exec_send_nodeinfo(us->node_id);
3068  }
3069 
3070  update_qdevice_master_wins(req_lib_votequorum_qdevice_master_wins->allow);
3071  } else {
3072  error = CS_ERR_NOT_EXIST;
3073  }
3074 
3075 out:
3078  res_lib_votequorum_status.header.error = error;
3080 
3081  LEAVE();
3082 }
#define SERVICE_ID_MAKE(a, b)
Definition: coroapi.h:458
@ CS_LIB_ALLOW_INQUORATE
Definition: coroapi.h:164
qb_loop_timer_handle corosync_timer_handle_t
corosync_timer_handle_t
Definition: coroapi.h:74
#define TOTEM_AGREED
Definition: coroapi.h:102
#define COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
Definition: coroapi.h:157
#define COROSYNC_LIB_FLOW_CONTROL_REQUIRED
Definition: coroapi.h:156
#define PROCESSOR_COUNT_MAX
Definition: coroapi.h:96
@ VOTEQUORUM_SERVICE
Definition: corodefs.h:49
#define CS_TRACK_CURRENT
Definition: corotypes.h:90
#define CS_FALSE
Definition: corotypes.h:53
#define CS_PRI_NODE_ID
Definition: corotypes.h:59
#define CS_TRACK_CHANGES
Definition: corotypes.h:91
#define CS_TRUE
Definition: corotypes.h:54
#define CS_TRACK_CHANGES_ONLY
Definition: corotypes.h:92
cs_error_t
The cs_error_t enum.
Definition: corotypes.h:97
@ CS_ERR_MESSAGE_ERROR
Definition: corotypes.h:119
@ CS_ERR_NO_RESOURCES
Definition: corotypes.h:115
@ CS_ERR_ACCESS
Definition: corotypes.h:108
@ CS_ERR_TRY_AGAIN
Definition: corotypes.h:103
@ CS_OK
Definition: corotypes.h:98
@ CS_ERR_INVALID_PARAM
Definition: corotypes.h:104
@ CS_ERR_NAME_NOT_FOUND
Definition: corotypes.h:114
@ CS_ERR_NOT_EXIST
Definition: corotypes.h:109
@ CS_ERR_EXIST
Definition: corotypes.h:111
#define CS_PRI_RING_ID
Definition: corotypes.h:61
void(* quorum_set_quorate_fn_t)(const unsigned int *view_list, size_t view_list_entries, int quorate, struct memb_ring_id *)
Definition: exec/quorum.h:42
#define VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA
#define NODE_FLAGS_QUORATE
#define VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES
#define DEFAULT_LMS_WIN
#define VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES
uint32_t operation
#define VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER
uint8_t param
char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define NODE_FLAGS_FIRST
uint32_t nodeid
#define NODE_FLAGS_QDEVICE_REGISTERED
#define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE
uint32_t flags
nodestate_t
@ NODESTATE_LEAVING
@ NODESTATE_DEAD
@ NODESTATE_MEMBER
char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
uint32_t votes
#define NODE_FLAGS_QDEVICE_CAST_VOTE
typedef __attribute__
#define MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE
uint32_t expected_votes
#define NODE_FLAGS_QDEVICE_MASTER_WINS
#define VOTEQUORUM_READCONFIG_STARTUP
#define NODE_FLAGS_LEAVING
#define NODE_FLAGS_QDEVICE_ALIVE
#define NODE_FLAGS_WFASTATUS
#define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG
char * votequorum_init(struct corosync_api_v1 *api, quorum_set_quorate_fn_t q_set_quorate_fn)
@ ATB_LIST
@ ATB_LOWEST
@ ATB_HIGHEST
@ ATB_NONE
LOGSYS_DECLARE_SUBSYS("VOTEQ")
#define VOTEQUORUM_QDEVICE_OPERATION_REGISTER
#define MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO
uint32_t value
struct corosync_service_engine * votequorum_get_service_engine_ver0(void)
#define VOTEQUORUM_READCONFIG_RUNTIME
#define max(a, b)
cs_error_t icmap_get_uint8(const char *key_name, uint8_t *u8)
Definition: icmap.c:868
#define ICMAP_TRACK_MODIFY
Definition: icmap.h:78
cs_error_t icmap_get_uint32(const char *key_name, uint32_t *u32)
Definition: icmap.c:892
cs_error_t icmap_set_uint8(const char *key_name, uint8_t value)
Definition: icmap.c:573
cs_error_t icmap_set_ro_access(const char *key_name, int prefix, int ro_access)
Set read-only access for given key (key_name) or prefix, If prefix is set.
Definition: icmap.c:1225
#define ICMAP_TRACK_DELETE
Definition: icmap.h:77
cs_error_t icmap_track_add(const char *key_name, int32_t track_type, icmap_notify_fn_t notify_fn, void *user_data, icmap_track_t *icmap_track)
Add tracking function for given key_name.
Definition: icmap.c:1159
#define ICMAP_TRACK_PREFIX
Whole prefix is tracked, instead of key only (so "totem." tracking means that "totem....
Definition: icmap.h:85
icmap_iter_t icmap_iter_init(const char *prefix)
Initialize iterator with given prefix.
Definition: icmap.c:1089
const char * icmap_iter_next(icmap_iter_t iter, size_t *value_len, icmap_value_types_t *type)
Return next item in iterator iter.
Definition: icmap.c:1095
qb_map_iter_t * icmap_iter_t
Itterator type.
Definition: icmap.h:123
void icmap_iter_finalize(icmap_iter_t iter)
Finalize iterator.
Definition: icmap.c:1116
cs_error_t icmap_get_int32(const char *key_name, int32_t *i32)
Definition: icmap.c:886
#define ICMAP_KEYNAME_MAXLEN
Maximum length of key in icmap.
Definition: icmap.h:48
cs_error_t icmap_set_uint32(const char *key_name, uint32_t value)
Definition: icmap.c:597
#define ICMAP_TRACK_ADD
Definition: icmap.h:76
cs_error_t icmap_get_string(const char *key_name, char **str)
Shortcut for icmap_get for string type.
Definition: icmap.c:856
#define VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT
#define VOTEQUORUM_INFO_AUTO_TIE_BREAKER
#define VOTEQUORUM_INFO_QDEVICE_ALIVE
#define VOTEQUORUM_NODESTATE_MEMBER
#define VOTEQUORUM_NODESTATE_DEAD
#define VOTEQUORUM_INFO_ALLOW_DOWNSCALE
@ MESSAGE_RES_VOTEQUORUM_NODELIST_NOTIFICATION
@ MESSAGE_RES_VOTEQUORUM_EXPECTEDVOTES_NOTIFICATION
@ MESSAGE_RES_VOTEQUORUM_STATUS
@ MESSAGE_RES_VOTEQUORUM_QUORUM_NOTIFICATION
@ MESSAGE_RES_VOTEQUORUM_GETINFO
#define VOTEQUORUM_INFO_QDEVICE_MASTER_WINS
#define VOTEQUORUM_NODESTATE_LEAVING
#define VOTEQUORUM_INFO_TWONODE
#define VOTEQUORUM_INFO_WAIT_FOR_ALL
#define VOTEQUORUM_INFO_QUORATE
#define VOTEQUORUM_INFO_QDEVICE_REGISTERED
#define VOTEQUORUM_INFO_LAST_MAN_STANDING
#define VOTEQUORUM_QDEVICE_MAX_NAME_LEN
#define VOTEQUORUM_QDEVICE_NODEID
#define VOTEQUORUM_INFO_QDEVICE_CAST_VOTE
#define VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT
#define LOGSYS_LEVEL_ERROR
Definition: logsys.h:72
#define LEAVE
Definition: logsys.h:325
#define log_printf(level, format, args...)
Definition: logsys.h:323
#define LOGSYS_LEVEL_INFO
Definition: logsys.h:75
#define LOGSYS_LEVEL_CRIT
Definition: logsys.h:71
#define LOGSYS_LEVEL_NOTICE
Definition: logsys.h:74
#define LOGSYS_LEVEL_WARNING
Definition: logsys.h:73
#define LOGSYS_LEVEL_DEBUG
Definition: logsys.h:76
#define ENTER
Definition: logsys.h:324
void * user_data
Definition: sam.c:127
uint32_t quorate
Definition: sam.c:134
char * corosync_service_link_and_init(struct corosync_api_v1 *corosync_api, struct default_service *service)
Link and initialize a service.
Definition: service.c:117
nodestate_t state
uint32_t expected_votes
struct qb_list_head list
The corosync_api_v1 struct.
Definition: coroapi.h:225
int(* timer_add_duration)(unsigned long long nanoseconds_in_future, void *data, void(*timer_nf)(void *data), corosync_timer_handle_t *handle)
Definition: coroapi.h:229
int(* totem_mcast)(const struct iovec *iovec, unsigned int iov_len, unsigned int guarantee)
Definition: coroapi.h:279
void(* timer_delete)(corosync_timer_handle_t timer_handle)
Definition: coroapi.h:241
unsigned int(* totem_nodeid_get)(void)
Definition: coroapi.h:275
int(* ipc_dispatch_send)(void *conn, const void *msg, size_t mlen)
Definition: coroapi.h:263
int(* ipc_response_send)(void *conn, const void *msg, size_t mlen)
Definition: coroapi.h:258
void(* error_memory_failure)(void) __attribute__((noreturn))
Definition: coroapi.h:422
void *(* ipc_private_data_get)(void *conn)
Definition: coroapi.h:256
The corosync_exec_handler struct.
Definition: coroapi.h:475
void(* exec_handler_fn)(const void *msg, unsigned int nodeid)
Definition: coroapi.h:476
The corosync_lib_handler struct.
Definition: coroapi.h:467
void(* lib_handler_fn)(void *conn, const void *msg)
Definition: coroapi.h:468
The corosync_service_engine struct.
Definition: coroapi.h:490
const char * name
Definition: coroapi.h:491
const char * name
Definition: service.h:43
Structure passed as new_value and old_value in change callback.
Definition: icmap.h:91
The memb_ring_id struct.
Definition: coroapi.h:122
unsigned long long seq
Definition: coroapi.h:124
unsigned int nodeid
Definition: coroapi.h:123
unsigned char track_flags
uint64_t tracking_context
int tracking_enabled
struct qb_list_head list
struct qb_ipc_request_header header __attribute__((aligned(8)))
char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
struct qb_ipc_request_header header __attribute__((aligned(8)))
char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
struct qb_ipc_request_header header __attribute__((aligned(8)))
char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
struct qb_ipc_request_header header __attribute__((aligned(8)))
The req_lib_votequorum_getinfo struct.
The req_lib_votequorum_qdevice_master_wins struct.
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
The req_lib_votequorum_qdevice_poll struct.
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
The req_lib_votequorum_qdevice_register struct.
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
The req_lib_votequorum_qdevice_unregister struct.
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
The req_lib_votequorum_qdevice_update struct.
char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
The req_lib_votequorum_setexpected struct.
The req_lib_votequorum_setvotes struct.
The req_lib_votequorum_trackstart struct.
The res_lib_votequorum_expectedvotes_notification struct.
The res_lib_votequorum_getinfo struct.
char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
The res_lib_votequorum_quorum_notification struct.
The res_lib_votequorum_status struct.
The votequorum_node struct.
#define swab32(x)
The swab32 macro.
Definition: swab.h:51
struct memb_ring_id ring_id
Definition: totemsrp.c:4
struct totem_message_header header
Definition: totemsrp.c:0
const char * get_state_dir(void)
Definition: util.c:172