corosync  3.1.0
totemsrp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2003-2006 MontaVista Software, Inc.
3  * Copyright (c) 2006-2018 Red Hat, Inc.
4  *
5  * All rights reserved.
6  *
7  * Author: Steven Dake (sdake@redhat.com)
8  *
9  * This software licensed under BSD license, the text of which follows:
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are met:
13  *
14  * - Redistributions of source code must retain the above copyright notice,
15  * this list of conditions and the following disclaimer.
16  * - Redistributions in binary form must reproduce the above copyright notice,
17  * this list of conditions and the following disclaimer in the documentation
18  * and/or other materials provided with the distribution.
19  * - Neither the name of the MontaVista Software, Inc. nor the names of its
20  * contributors may be used to endorse or promote products derived from this
21  * software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
33  * THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /*
37  * The first version of this code was based upon Yair Amir's PhD thesis:
38  * http://www.cs.jhu.edu/~yairamir/phd.ps) (ch4,5).
39  *
40  * The current version of totemsrp implements the Totem protocol specified in:
41  * http://citeseer.ist.psu.edu/amir95totem.html
42  *
43  * The deviations from the above published protocols are:
44  * - token hold mode where token doesn't rotate on unused ring - reduces cpu
45  * usage on 1.6ghz xeon from 35% to less then .1 % as measured by top
46  */
47 
48 #include <config.h>
49 
50 #include <assert.h>
51 #ifdef HAVE_ALLOCA_H
52 #include <alloca.h>
53 #endif
54 #include <sys/mman.h>
55 #include <sys/types.h>
56 #include <sys/stat.h>
57 #include <sys/socket.h>
58 #include <netdb.h>
59 #include <sys/un.h>
60 #include <sys/ioctl.h>
61 #include <sys/param.h>
62 #include <netinet/in.h>
63 #include <arpa/inet.h>
64 #include <unistd.h>
65 #include <fcntl.h>
66 #include <stdlib.h>
67 #include <stdio.h>
68 #include <errno.h>
69 #include <sched.h>
70 #include <time.h>
71 #include <sys/time.h>
72 #include <sys/poll.h>
73 #include <sys/uio.h>
74 #include <limits.h>
75 
76 #include <qb/qblist.h>
77 #include <qb/qbdefs.h>
78 #include <qb/qbutil.h>
79 #include <qb/qbloop.h>
80 
81 #include <corosync/swab.h>
82 #include <corosync/sq.h>
83 
84 #define LOGSYS_UTILS_ONLY 1
85 #include <corosync/logsys.h>
86 
87 #include "totemsrp.h"
88 #include "totemnet.h"
89 
90 #include "icmap.h"
91 #include "totemconfig.h"
92 
93 #include "cs_queue.h"
94 
95 #define LOCALHOST_IP inet_addr("127.0.0.1")
96 #define QUEUE_RTR_ITEMS_SIZE_MAX 16384 /* allow 16384 retransmit items */
97 #define RETRANS_MESSAGE_QUEUE_SIZE_MAX 16384 /* allow 500 messages to be queued */
98 #define RECEIVED_MESSAGE_QUEUE_SIZE_MAX 500 /* allow 500 messages to be queued */
99 #define MAXIOVS 5
100 #define RETRANSMIT_ENTRIES_MAX 30
101 #define TOKEN_SIZE_MAX 64000 /* bytes */
102 #define LEAVE_DUMMY_NODEID 0
103 
104 /*
105  * SRP address.
106  */
107 struct srp_addr {
108  unsigned int nodeid;
109 };
110 
111 /*
112  * Rollover handling:
113  * SEQNO_START_MSG is the starting sequence number after a new configuration
114  * This should remain zero, unless testing overflow in which case
115  * 0x7ffff000 and 0xfffff000 are good starting values.
116  *
117  * SEQNO_START_TOKEN is the starting sequence number after a new configuration
118  * for a token. This should remain zero, unless testing overflow in which
119  * case 07fffff00 or 0xffffff00 are good starting values.
120  */
121 #define SEQNO_START_MSG 0x0
122 #define SEQNO_START_TOKEN 0x0
123 
124 /*
125  * These can be used ot test different rollover points
126  * #define SEQNO_START_MSG 0xfffffe00
127  * #define SEQNO_START_TOKEN 0xfffffe00
128  */
129 
130 /*
131  * These can be used to test the error recovery algorithms
132  * #define TEST_DROP_ORF_TOKEN_PERCENTAGE 30
133  * #define TEST_DROP_COMMIT_TOKEN_PERCENTAGE 30
134  * #define TEST_DROP_MCAST_PERCENTAGE 50
135  * #define TEST_RECOVERY_MSG_COUNT 300
136  */
137 
138 /*
139  * we compare incoming messages to determine if their endian is
140  * different - if so convert them
141  *
142  * do not change
143  */
144 #define ENDIAN_LOCAL 0xff22
145 
147  MESSAGE_TYPE_ORF_TOKEN = 0, /* Ordering, Reliability, Flow (ORF) control Token */
148  MESSAGE_TYPE_MCAST = 1, /* ring ordered multicast message */
149  MESSAGE_TYPE_MEMB_MERGE_DETECT = 2, /* merge rings if there are available rings */
150  MESSAGE_TYPE_MEMB_JOIN = 3, /* membership join message */
151  MESSAGE_TYPE_MEMB_COMMIT_TOKEN = 4, /* membership commit token */
152  MESSAGE_TYPE_TOKEN_HOLD_CANCEL = 5, /* cancel the holding of the token */
153 };
154 
158 };
159 
160 /*
161  * New membership algorithm local variables
162  */
164  struct srp_addr addr;
165  int set;
166 };
167 
168 
170  struct qb_list_head list;
171  int (*callback_fn) (enum totem_callback_token_type type, const void *);
173  int delete;
174  void *data;
175 };
176 
177 
179  int mcast;
180  int token;
181 };
182 
183 struct mcast {
185  struct srp_addr system_from;
186  unsigned int seq;
188  struct memb_ring_id ring_id;
189  unsigned int node_id;
191 } __attribute__((packed));
192 
193 
194 struct rtr_item {
195  struct memb_ring_id ring_id;
196  unsigned int seq;
197 }__attribute__((packed));
198 
199 
200 struct orf_token {
202  unsigned int seq;
203  unsigned int token_seq;
204  unsigned int aru;
205  unsigned int aru_addr;
206  struct memb_ring_id ring_id;
207  unsigned int backlog;
208  unsigned int fcc;
211  struct rtr_item rtr_list[0];
212 }__attribute__((packed));
213 
214 
215 struct memb_join {
217  struct srp_addr system_from;
218  unsigned int proc_list_entries;
219  unsigned int failed_list_entries;
220  unsigned long long ring_seq;
221  unsigned char end_of_memb_join[0];
222 /*
223  * These parts of the data structure are dynamic:
224  * struct srp_addr proc_list[];
225  * struct srp_addr failed_list[];
226  */
227 } __attribute__((packed));
228 
229 
232  struct srp_addr system_from;
233  struct memb_ring_id ring_id;
234 } __attribute__((packed));
235 
236 
239  struct memb_ring_id ring_id;
240 } __attribute__((packed));
241 
242 
244  struct memb_ring_id ring_id;
245  unsigned int aru;
246  unsigned int high_delivered;
247  unsigned int received_flg;
248 }__attribute__((packed));
249 
250 
253  unsigned int token_seq;
254  struct memb_ring_id ring_id;
255  unsigned int retrans_flg;
258  unsigned char end_of_commit_token[0];
259 /*
260  * These parts of the data structure are dynamic:
261  *
262  * struct srp_addr addr[PROCESSOR_COUNT_MAX];
263  * struct memb_commit_token_memb_entry memb_list[PROCESSOR_COUNT_MAX];
264  */
265 }__attribute__((packed));
266 
267 struct message_item {
268  struct mcast *mcast;
269  unsigned int msg_len;
270 };
271 
273  struct mcast *mcast;
274  unsigned int msg_len;
275 };
276 
282 };
283 
286 
288 
289  /*
290  * Flow control mcasts and remcasts on last and current orf_token
291  */
293 
295 
297 
299 
301 
303 
304  struct srp_addr my_id;
305 
307 
309 
311 
313 
315 
317 
319 
321 
323 
325 
327 
329 
331 
333 
335 
337 
339 
340  struct memb_ring_id my_ring_id;
341 
343 
345 
347 
348  unsigned int my_last_aru;
349 
351 
353 
354  unsigned int my_high_seq_received;
355 
356  unsigned int my_install_seq;
357 
359 
361 
363 
365 
367 
368  /*
369  * Queues used to order, deliver, and recover messages
370  */
372 
374 
376 
377  struct sq regular_sort_queue;
378 
379  struct sq recovery_sort_queue;
380 
381  /*
382  * Received up to and including
383  */
384  unsigned int my_aru;
385 
386  unsigned int my_high_delivered;
387 
388  struct qb_list_head token_callback_received_listhead;
389 
390  struct qb_list_head token_callback_sent_listhead;
391 
393 
395 
396  unsigned int my_token_seq;
397 
398  /*
399  * Timers
400  */
401  qb_loop_timer_handle timer_pause_timeout;
402 
403  qb_loop_timer_handle timer_orf_token_timeout;
404 
405  qb_loop_timer_handle timer_orf_token_warning;
406 
408 
410 
411  qb_loop_timer_handle timer_merge_detect_timeout;
412 
414 
416 
417  qb_loop_timer_handle memb_timer_state_commit_timeout;
418 
419  qb_loop_timer_handle timer_heartbeat_timeout;
420 
421  /*
422  * Function and data used to log messages
423  */
425 
427 
429 
431 
433 
435 
437 
439  int level,
440  int subsys,
441  const char *function,
442  const char *file,
443  int line,
444  const char *format, ...)__attribute__((format(printf, 6, 7)));;
445 
446  enum memb_state memb_state;
447 
448 //TODO struct srp_addr next_memb;
449 
451 
453 
455  unsigned int nodeid,
456  const void *msg,
457  unsigned int msg_len,
458  int endian_conversion_required);
459 
461  enum totem_configuration_type configuration_type,
462  const unsigned int *member_list, size_t member_list_entries,
463  const unsigned int *left_list, size_t left_list_entries,
464  const unsigned int *joined_list, size_t joined_list_entries,
465  const struct memb_ring_id *ring_id);
466 
468 
470  int waiting_trans_ack);
471 
473  struct memb_ring_id *memb_ring_id,
474  unsigned int nodeid);
475 
477  const struct memb_ring_id *memb_ring_id,
478  unsigned int nodeid);
479 
481 
483 
484  unsigned long long token_ring_id_seq;
485 
486  unsigned int last_released;
487 
488  unsigned int set_aru;
489 
491 
493 
495 
496  unsigned int my_last_seq;
497 
498  struct timeval tv_old;
499 
501 
503 
504  unsigned int use_heartbeat;
505 
506  unsigned int my_trc;
507 
508  unsigned int my_pbl;
509 
510  unsigned int my_cbl;
511 
512  uint64_t pause_timestamp;
513 
515 
517 
519 
521 
523 
525 
526  int flushing;
527 
530  char commit_token_storage[40000];
531 };
532 
534  int count;
535  int (*handler_functions[6]) (
536  struct totemsrp_instance *instance,
537  const void *msg,
538  size_t msg_len,
539  int endian_conversion_needed);
540 };
541 
560 };
561 
562 const char* gather_state_from_desc [] = {
563  [TOTEMSRP_GSFROM_CONSENSUS_TIMEOUT] = "consensus timeout",
564  [TOTEMSRP_GSFROM_GATHER_MISSING1] = "MISSING",
565  [TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_OPERATIONAL_STATE] = "The token was lost in the OPERATIONAL state.",
566  [TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED] = "The consensus timeout expired.",
567  [TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_COMMIT_STATE] = "The token was lost in the COMMIT state.",
568  [TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_RECOVERY_STATE] = "The token was lost in the RECOVERY state.",
569  [TOTEMSRP_GSFROM_FAILED_TO_RECEIVE] = "failed to receive",
570  [TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_OPERATIONAL_STATE] = "foreign message in operational state",
571  [TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_GATHER_STATE] = "foreign message in gather state",
572  [TOTEMSRP_GSFROM_MERGE_DURING_OPERATIONAL_STATE] = "merge during operational state",
573  [TOTEMSRP_GSFROM_MERGE_DURING_GATHER_STATE] = "merge during gather state",
574  [TOTEMSRP_GSFROM_MERGE_DURING_JOIN] = "merge during join",
575  [TOTEMSRP_GSFROM_JOIN_DURING_OPERATIONAL_STATE] = "join during operational state",
576  [TOTEMSRP_GSFROM_JOIN_DURING_COMMIT_STATE] = "join during commit state",
577  [TOTEMSRP_GSFROM_JOIN_DURING_RECOVERY] = "join during recovery",
578  [TOTEMSRP_GSFROM_INTERFACE_CHANGE] = "interface change",
579 };
580 
581 /*
582  * forward decls
583  */
584 static int message_handler_orf_token (
585  struct totemsrp_instance *instance,
586  const void *msg,
587  size_t msg_len,
588  int endian_conversion_needed);
589 
590 static int message_handler_mcast (
591  struct totemsrp_instance *instance,
592  const void *msg,
593  size_t msg_len,
594  int endian_conversion_needed);
595 
596 static int message_handler_memb_merge_detect (
597  struct totemsrp_instance *instance,
598  const void *msg,
599  size_t msg_len,
600  int endian_conversion_needed);
601 
602 static int message_handler_memb_join (
603  struct totemsrp_instance *instance,
604  const void *msg,
605  size_t msg_len,
606  int endian_conversion_needed);
607 
608 static int message_handler_memb_commit_token (
609  struct totemsrp_instance *instance,
610  const void *msg,
611  size_t msg_len,
612  int endian_conversion_needed);
613 
614 static int message_handler_token_hold_cancel (
615  struct totemsrp_instance *instance,
616  const void *msg,
617  size_t msg_len,
618  int endian_conversion_needed);
619 
620 static void totemsrp_instance_initialize (struct totemsrp_instance *instance);
621 
622 static void srp_addr_to_nodeid (
623  struct totemsrp_instance *instance,
624  unsigned int *nodeid_out,
625  struct srp_addr *srp_addr_in,
626  unsigned int entries);
627 
628 static int srp_addr_equal (const struct srp_addr *a, const struct srp_addr *b);
629 
630 static void memb_leave_message_send (struct totemsrp_instance *instance);
631 
632 static void token_callbacks_execute (struct totemsrp_instance *instance, enum totem_callback_token_type type);
633 static void memb_state_gather_enter (struct totemsrp_instance *instance, enum gather_state_from gather_from);
634 static void messages_deliver_to_app (struct totemsrp_instance *instance, int skip, unsigned int end_point);
635 static int orf_token_mcast (struct totemsrp_instance *instance, struct orf_token *oken,
636  int fcc_mcasts_allowed);
637 static void messages_free (struct totemsrp_instance *instance, unsigned int token_aru);
638 
639 static void memb_ring_id_set (struct totemsrp_instance *instance,
640  const struct memb_ring_id *ring_id);
641 static void target_set_completed (void *context);
642 static void memb_state_commit_token_update (struct totemsrp_instance *instance);
643 static void memb_state_commit_token_target_set (struct totemsrp_instance *instance);
644 static int memb_state_commit_token_send (struct totemsrp_instance *instance);
645 static int memb_state_commit_token_send_recovery (struct totemsrp_instance *instance, struct memb_commit_token *memb_commit_token);
646 static void memb_state_commit_token_create (struct totemsrp_instance *instance);
647 static int token_hold_cancel_send (struct totemsrp_instance *instance);
648 static void orf_token_endian_convert (const struct orf_token *in, struct orf_token *out);
649 static void memb_commit_token_endian_convert (const struct memb_commit_token *in, struct memb_commit_token *out);
650 static void memb_join_endian_convert (const struct memb_join *in, struct memb_join *out);
651 static void mcast_endian_convert (const struct mcast *in, struct mcast *out);
652 static void memb_merge_detect_endian_convert (
653  const struct memb_merge_detect *in,
654  struct memb_merge_detect *out);
655 static struct srp_addr srp_addr_endian_convert (struct srp_addr in);
656 static void timer_function_orf_token_timeout (void *data);
657 static void timer_function_orf_token_warning (void *data);
658 static void timer_function_pause_timeout (void *data);
659 static void timer_function_heartbeat_timeout (void *data);
660 static void timer_function_token_retransmit_timeout (void *data);
661 static void timer_function_token_hold_retransmit_timeout (void *data);
662 static void timer_function_merge_detect_timeout (void *data);
663 static void *totemsrp_buffer_alloc (struct totemsrp_instance *instance);
664 static void totemsrp_buffer_release (struct totemsrp_instance *instance, void *ptr);
665 static const char* gsfrom_to_msg(enum gather_state_from gsfrom);
666 
667 void main_deliver_fn (
668  void *context,
669  const void *msg,
670  unsigned int msg_len,
671  const struct sockaddr_storage *system_from);
672 
674  void *context,
675  const struct totem_ip_address *iface_address,
676  unsigned int iface_no);
677 
679  6,
680  {
681  message_handler_orf_token, /* MESSAGE_TYPE_ORF_TOKEN */
682  message_handler_mcast, /* MESSAGE_TYPE_MCAST */
683  message_handler_memb_merge_detect, /* MESSAGE_TYPE_MEMB_MERGE_DETECT */
684  message_handler_memb_join, /* MESSAGE_TYPE_MEMB_JOIN */
685  message_handler_memb_commit_token, /* MESSAGE_TYPE_MEMB_COMMIT_TOKEN */
686  message_handler_token_hold_cancel /* MESSAGE_TYPE_TOKEN_HOLD_CANCEL */
687  }
688 };
689 
690 #define log_printf(level, format, args...) \
691 do { \
692  instance->totemsrp_log_printf ( \
693  level, instance->totemsrp_subsys_id, \
694  __FUNCTION__, __FILE__, __LINE__, \
695  format, ##args); \
696 } while (0);
697 #define LOGSYS_PERROR(err_num, level, fmt, args...) \
698 do { \
699  char _error_str[LOGSYS_MAX_PERROR_MSG_LEN]; \
700  const char *_error_ptr = qb_strerror_r(err_num, _error_str, sizeof(_error_str)); \
701  instance->totemsrp_log_printf ( \
702  level, instance->totemsrp_subsys_id, \
703  __FUNCTION__, __FILE__, __LINE__, \
704  fmt ": %s (%d)\n", ##args, _error_ptr, err_num); \
705  } while(0)
706 
707 static const char* gsfrom_to_msg(enum gather_state_from gsfrom)
708 {
709  if (gsfrom <= TOTEMSRP_GSFROM_MAX) {
710  return gather_state_from_desc[gsfrom];
711  }
712  else {
713  return "UNKNOWN";
714  }
715 }
716 
717 static void totemsrp_instance_initialize (struct totemsrp_instance *instance)
718 {
719  memset (instance, 0, sizeof (struct totemsrp_instance));
720 
721  qb_list_init (&instance->token_callback_received_listhead);
722 
723  qb_list_init (&instance->token_callback_sent_listhead);
724 
725  instance->my_received_flg = 1;
726 
727  instance->my_token_seq = SEQNO_START_TOKEN - 1;
728 
730 
731  instance->set_aru = -1;
732 
733  instance->my_aru = SEQNO_START_MSG;
734 
736 
738 
739  instance->orf_token_discard = 0;
740 
741  instance->originated_orf_token = 0;
742 
743  instance->commit_token = (struct memb_commit_token *)instance->commit_token_storage;
744 
745  instance->waiting_trans_ack = 1;
746 }
747 
748 static int pause_flush (struct totemsrp_instance *instance)
749 {
750  uint64_t now_msec;
751  uint64_t timestamp_msec;
752  int res = 0;
753 
754  now_msec = (qb_util_nano_current_get () / QB_TIME_NS_IN_MSEC);
755  timestamp_msec = instance->pause_timestamp / QB_TIME_NS_IN_MSEC;
756 
757  if ((now_msec - timestamp_msec) > (instance->totem_config->token_timeout / 2)) {
759  "Process pause detected for %d ms, flushing membership messages.", (unsigned int)(now_msec - timestamp_msec));
760  /*
761  * -1 indicates an error from recvmsg
762  */
763  do {
765  } while (res == -1);
766  }
767  return (res);
768 }
769 
770 static int token_event_stats_collector (enum totem_callback_token_type type, const void *void_instance)
771 {
772  struct totemsrp_instance *instance = (struct totemsrp_instance *)void_instance;
773  uint32_t time_now;
774  unsigned long long nano_secs = qb_util_nano_current_get ();
775 
776  time_now = (nano_secs / QB_TIME_NS_IN_MSEC);
777 
779  /* incr latest token the index */
780  if (instance->stats.latest_token == (TOTEM_TOKEN_STATS_MAX - 1))
781  instance->stats.latest_token = 0;
782  else
783  instance->stats.latest_token++;
784 
785  if (instance->stats.earliest_token == instance->stats.latest_token) {
786  /* we have filled up the array, start overwriting */
787  if (instance->stats.earliest_token == (TOTEM_TOKEN_STATS_MAX - 1))
788  instance->stats.earliest_token = 0;
789  else
790  instance->stats.earliest_token++;
791 
792  instance->stats.token[instance->stats.earliest_token].rx = 0;
793  instance->stats.token[instance->stats.earliest_token].tx = 0;
794  instance->stats.token[instance->stats.earliest_token].backlog_calc = 0;
795  }
796 
797  instance->stats.token[instance->stats.latest_token].rx = time_now;
798  instance->stats.token[instance->stats.latest_token].tx = 0; /* in case we drop the token */
799  } else {
800  instance->stats.token[instance->stats.latest_token].tx = time_now;
801  }
802  return 0;
803 }
804 
805 static void totempg_mtu_changed(void *context, int net_mtu)
806 {
807  struct totemsrp_instance *instance = context;
808 
809  instance->totem_config->net_mtu = net_mtu - 2 * sizeof (struct mcast);
810 
812  "Net MTU changed to %d, new value is %d",
813  net_mtu, instance->totem_config->net_mtu);
814 }
815 
816 /*
817  * Exported interfaces
818  */
820  qb_loop_t *poll_handle,
821  void **srp_context,
822  struct totem_config *totem_config,
823  totempg_stats_t *stats,
824 
825  void (*deliver_fn) (
826  unsigned int nodeid,
827  const void *msg,
828  unsigned int msg_len,
829  int endian_conversion_required),
830 
831  void (*confchg_fn) (
832  enum totem_configuration_type configuration_type,
833  const unsigned int *member_list, size_t member_list_entries,
834  const unsigned int *left_list, size_t left_list_entries,
835  const unsigned int *joined_list, size_t joined_list_entries,
836  const struct memb_ring_id *ring_id),
837  void (*waiting_trans_ack_cb_fn) (
838  int waiting_trans_ack))
839 {
840  struct totemsrp_instance *instance;
841  int res;
842 
843  instance = malloc (sizeof (struct totemsrp_instance));
844  if (instance == NULL) {
845  goto error_exit;
846  }
847 
848  totemsrp_instance_initialize (instance);
849 
850  instance->totemsrp_waiting_trans_ack_cb_fn = waiting_trans_ack_cb_fn;
851  instance->totemsrp_waiting_trans_ack_cb_fn (1);
852 
853  stats->srp = &instance->stats;
854  instance->stats.latest_token = 0;
855  instance->stats.earliest_token = 0;
856 
857  instance->totem_config = totem_config;
858 
859  /*
860  * Configure logging
861  */
870 
871  /*
872  * Configure totem store and load functions
873  */
876 
877  /*
878  * Initialize local variables for totemsrp
879  */
881 
882  /*
883  * Display totem configuration
884  */
886  "Token Timeout (%d ms) retransmit timeout (%d ms)",
889  uint32_t token_warning_ms = totem_config->token_warning * totem_config->token_timeout / 100;
891  "Token warning every %d ms (%d%% of Token Timeout)",
892  token_warning_ms, totem_config->token_warning);
893  if (token_warning_ms < totem_config->token_retransmit_timeout)
895  "The token warning interval (%d ms) is less than the token retransmit timeout (%d ms) "
896  "which can lead to spurious token warnings. Consider increasing the token_warning parameter.",
897  token_warning_ms, totem_config->token_retransmit_timeout);
898  } else {
900  "Token warnings disabled");
901  }
903  "token hold (%d ms) retransmits before loss (%d retrans)",
906  "join (%d ms) send_join (%d ms) consensus (%d ms) merge (%d ms)",
910 
913  "downcheck (%d ms) fail to recv const (%d msgs)",
916  "seqno unchanged const (%d rotations) Maximum network MTU %d", totem_config->seqno_unchanged_const, totem_config->net_mtu);
917 
919  "window size per rotation (%d messages) maximum messages per rotation (%d messages)",
921 
923  "missed count const (%d messages)",
925 
927  "send threads (%d threads)", totem_config->threads);
928 
930  "heartbeat_failures_allowed (%d)", totem_config->heartbeat_failures_allowed);
932  "max_network_delay (%d ms)", totem_config->max_network_delay);
933 
934 
935  cs_queue_init (&instance->retrans_message_queue, RETRANS_MESSAGE_QUEUE_SIZE_MAX,
936  sizeof (struct message_item), instance->threaded_mode_enabled);
937 
938  sq_init (&instance->regular_sort_queue,
939  QUEUE_RTR_ITEMS_SIZE_MAX, sizeof (struct sort_queue_item), 0);
940 
941  sq_init (&instance->recovery_sort_queue,
942  QUEUE_RTR_ITEMS_SIZE_MAX, sizeof (struct sort_queue_item), 0);
943 
944  instance->totemsrp_poll_handle = poll_handle;
945 
946  instance->totemsrp_deliver_fn = deliver_fn;
947 
948  instance->totemsrp_confchg_fn = confchg_fn;
949  instance->use_heartbeat = 1;
950 
951  timer_function_pause_timeout (instance);
952 
955  "HeartBeat is Disabled. To enable set heartbeat_failures_allowed > 0");
956  instance->use_heartbeat = 0;
957  }
958 
959  if (instance->use_heartbeat) {
960  instance->heartbeat_timeout
963 
964  if (instance->heartbeat_timeout >= totem_config->token_timeout) {
966  "total heartbeat_timeout (%d ms) is not less than token timeout (%d ms)",
967  instance->heartbeat_timeout,
970  "heartbeat_timeout = heartbeat_failures_allowed * token_retransmit_timeout + max_network_delay");
972  "heartbeat timeout should be less than the token timeout. Heartbeat is disabled!!");
973  instance->use_heartbeat = 0;
974  }
975  else {
977  "total heartbeat_timeout (%d ms)", instance->heartbeat_timeout);
978  }
979  }
980 
981  res = totemnet_initialize (
982  poll_handle,
983  &instance->totemnet_context,
984  totem_config,
985  stats->srp,
986  instance,
989  totempg_mtu_changed,
990  target_set_completed);
991  if (res == -1) {
992  goto error_exit;
993  }
994 
995  instance->my_id.nodeid = instance->totem_config->interfaces[instance->lowest_active_if].boundto.nodeid;
996 
997  /*
998  * Must have net_mtu adjusted by totemnet_initialize first
999  */
1000  cs_queue_init (&instance->new_message_queue,
1002  sizeof (struct message_item), instance->threaded_mode_enabled);
1003 
1004  cs_queue_init (&instance->new_message_queue_trans,
1006  sizeof (struct message_item), instance->threaded_mode_enabled);
1007 
1009  &instance->token_recv_event_handle,
1011  0,
1012  token_event_stats_collector,
1013  instance);
1015  &instance->token_sent_event_handle,
1017  0,
1018  token_event_stats_collector,
1019  instance);
1020  *srp_context = instance;
1021  return (0);
1022 
1023 error_exit:
1024  return (-1);
1025 }
1026 
1028  void *srp_context)
1029 {
1030  struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
1031 
1032  memb_leave_message_send (instance);
1033  totemnet_finalize (instance->totemnet_context);
1034  cs_queue_free (&instance->new_message_queue);
1035  cs_queue_free (&instance->new_message_queue_trans);
1036  cs_queue_free (&instance->retrans_message_queue);
1037  sq_free (&instance->regular_sort_queue);
1038  sq_free (&instance->recovery_sort_queue);
1039  free (instance);
1040 }
1041 
1042 /*
1043  * Return configured interfaces. interfaces is array of totem_ip addresses allocated by caller,
1044  * with interaces_size number of items. iface_count is final number of interfaces filled by this
1045  * function.
1046  *
1047  * Function returns 0 on success, otherwise if interfaces array is not big enough, -2 is returned,
1048  * and if interface was not found, -1 is returned.
1049  */
1051  void *srp_context,
1052  unsigned int nodeid,
1053  unsigned int *interface_id,
1054  struct totem_ip_address *interfaces,
1055  unsigned int interfaces_size,
1056  char ***status,
1057  unsigned int *iface_count)
1058 {
1059  struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
1060  struct totem_ip_address *iface_ptr = interfaces;
1061  int res = 0;
1062  int i,n;
1063  int num_ifs = 0;
1064 
1065  memset(interfaces, 0, sizeof(struct totem_ip_address) * interfaces_size);
1066  *iface_count = INTERFACE_MAX;
1067 
1068  for (i=0; i<INTERFACE_MAX; i++) {
1069  for (n=0; n < instance->totem_config->interfaces[i].member_count; n++) {
1070  if (instance->totem_config->interfaces[i].configured &&
1071  instance->totem_config->interfaces[i].member_list[n].nodeid == nodeid) {
1072  memcpy(iface_ptr, &instance->totem_config->interfaces[i].member_list[n], sizeof(struct totem_ip_address));
1073  interface_id[num_ifs] = i;
1074  iface_ptr++;
1075  if (++num_ifs > interfaces_size) {
1076  res = -2;
1077  break;
1078  }
1079  }
1080  }
1081  }
1082 
1083  totemnet_ifaces_get(instance->totemnet_context, status, iface_count);
1084  *iface_count = num_ifs;
1085  return (res);
1086 }
1087 
1089  void *srp_context,
1090  const char *cipher_type,
1091  const char *hash_type)
1092 {
1093  struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
1094  int res;
1095 
1096  res = totemnet_crypto_set(instance->totemnet_context, cipher_type, hash_type);
1097 
1098  return (res);
1099 }
1100 
1101 
1103  void *srp_context)
1104 {
1105  struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
1106  unsigned int res;
1107 
1108  res = instance->my_id.nodeid;
1109 
1110  return (res);
1111 }
1112 
1114  void *srp_context)
1115 {
1116  struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
1117  int res;
1118 
1119  res = instance->totem_config->interfaces[instance->lowest_active_if].boundto.family;
1120 
1121  return (res);
1122 }
1123 
1124 
1125 /*
1126  * Set operations for use by the membership algorithm
1127  */
1128 static int srp_addr_equal (const struct srp_addr *a, const struct srp_addr *b)
1129 {
1130  if (a->nodeid == b->nodeid) {
1131  return 1;
1132  }
1133  return 0;
1134 }
1135 
1136 static void srp_addr_to_nodeid (
1137  struct totemsrp_instance *instance,
1138  unsigned int *nodeid_out,
1139  struct srp_addr *srp_addr_in,
1140  unsigned int entries)
1141 {
1142  unsigned int i;
1143 
1144  for (i = 0; i < entries; i++) {
1145  nodeid_out[i] = srp_addr_in[i].nodeid;
1146  }
1147 }
1148 
1149 static struct srp_addr srp_addr_endian_convert (struct srp_addr in)
1150 {
1151  struct srp_addr res;
1152 
1153  res.nodeid = swab32 (in.nodeid);
1154 
1155  return (res);
1156 }
1157 
1158 static void memb_consensus_reset (struct totemsrp_instance *instance)
1159 {
1160  instance->consensus_list_entries = 0;
1161 }
1162 
1163 static void memb_set_subtract (
1164  struct srp_addr *out_list, int *out_list_entries,
1165  struct srp_addr *one_list, int one_list_entries,
1166  struct srp_addr *two_list, int two_list_entries)
1167 {
1168  int found = 0;
1169  int i;
1170  int j;
1171 
1172  *out_list_entries = 0;
1173 
1174  for (i = 0; i < one_list_entries; i++) {
1175  for (j = 0; j < two_list_entries; j++) {
1176  if (srp_addr_equal (&one_list[i], &two_list[j])) {
1177  found = 1;
1178  break;
1179  }
1180  }
1181  if (found == 0) {
1182  out_list[*out_list_entries] = one_list[i];
1183  *out_list_entries = *out_list_entries + 1;
1184  }
1185  found = 0;
1186  }
1187 }
1188 
1189 /*
1190  * Set consensus for a specific processor
1191  */
1192 static void memb_consensus_set (
1193  struct totemsrp_instance *instance,
1194  const struct srp_addr *addr)
1195 {
1196  int found = 0;
1197  int i;
1198 
1199  for (i = 0; i < instance->consensus_list_entries; i++) {
1200  if (srp_addr_equal(addr, &instance->consensus_list[i].addr)) {
1201  found = 1;
1202  break; /* found entry */
1203  }
1204  }
1205  instance->consensus_list[i].addr = *addr;
1206  instance->consensus_list[i].set = 1;
1207  if (found == 0) {
1208  instance->consensus_list_entries++;
1209  }
1210  return;
1211 }
1212 
1213 /*
1214  * Is consensus set for a specific processor
1215  */
1216 static int memb_consensus_isset (
1217  struct totemsrp_instance *instance,
1218  const struct srp_addr *addr)
1219 {
1220  int i;
1221 
1222  for (i = 0; i < instance->consensus_list_entries; i++) {
1223  if (srp_addr_equal (addr, &instance->consensus_list[i].addr)) {
1224  return (instance->consensus_list[i].set);
1225  }
1226  }
1227  return (0);
1228 }
1229 
1230 /*
1231  * Is consensus agreed upon based upon consensus database
1232  */
1233 static int memb_consensus_agreed (
1234  struct totemsrp_instance *instance)
1235 {
1236  struct srp_addr token_memb[PROCESSOR_COUNT_MAX];
1237  int token_memb_entries = 0;
1238  int agreed = 1;
1239  int i;
1240 
1241  memb_set_subtract (token_memb, &token_memb_entries,
1242  instance->my_proc_list, instance->my_proc_list_entries,
1243  instance->my_failed_list, instance->my_failed_list_entries);
1244 
1245  for (i = 0; i < token_memb_entries; i++) {
1246  if (memb_consensus_isset (instance, &token_memb[i]) == 0) {
1247  agreed = 0;
1248  break;
1249  }
1250  }
1251 
1252  if (agreed && instance->failed_to_recv == 1) {
1253  /*
1254  * Both nodes agreed on our failure. We don't care how many proc list items left because we
1255  * will create single ring anyway.
1256  */
1257 
1258  return (agreed);
1259  }
1260 
1261  assert (token_memb_entries >= 1);
1262 
1263  return (agreed);
1264 }
1265 
1266 static void memb_consensus_notset (
1267  struct totemsrp_instance *instance,
1268  struct srp_addr *no_consensus_list,
1269  int *no_consensus_list_entries,
1270  struct srp_addr *comparison_list,
1271  int comparison_list_entries)
1272 {
1273  int i;
1274 
1275  *no_consensus_list_entries = 0;
1276 
1277  for (i = 0; i < instance->my_proc_list_entries; i++) {
1278  if (memb_consensus_isset (instance, &instance->my_proc_list[i]) == 0) {
1279  no_consensus_list[*no_consensus_list_entries] = instance->my_proc_list[i];
1280  *no_consensus_list_entries = *no_consensus_list_entries + 1;
1281  }
1282  }
1283 }
1284 
1285 /*
1286  * Is set1 equal to set2 Entries can be in different orders
1287  */
1288 static int memb_set_equal (
1289  struct srp_addr *set1, int set1_entries,
1290  struct srp_addr *set2, int set2_entries)
1291 {
1292  int i;
1293  int j;
1294 
1295  int found = 0;
1296 
1297  if (set1_entries != set2_entries) {
1298  return (0);
1299  }
1300  for (i = 0; i < set2_entries; i++) {
1301  for (j = 0; j < set1_entries; j++) {
1302  if (srp_addr_equal (&set1[j], &set2[i])) {
1303  found = 1;
1304  break;
1305  }
1306  }
1307  if (found == 0) {
1308  return (0);
1309  }
1310  found = 0;
1311  }
1312  return (1);
1313 }
1314 
1315 /*
1316  * Is subset fully contained in fullset
1317  */
1318 static int memb_set_subset (
1319  const struct srp_addr *subset, int subset_entries,
1320  const struct srp_addr *fullset, int fullset_entries)
1321 {
1322  int i;
1323  int j;
1324  int found = 0;
1325 
1326  if (subset_entries > fullset_entries) {
1327  return (0);
1328  }
1329  for (i = 0; i < subset_entries; i++) {
1330  for (j = 0; j < fullset_entries; j++) {
1331  if (srp_addr_equal (&subset[i], &fullset[j])) {
1332  found = 1;
1333  }
1334  }
1335  if (found == 0) {
1336  return (0);
1337  }
1338  found = 0;
1339  }
1340  return (1);
1341 }
1342 /*
1343  * merge subset into fullset taking care not to add duplicates
1344  */
1345 static void memb_set_merge (
1346  const struct srp_addr *subset, int subset_entries,
1347  struct srp_addr *fullset, int *fullset_entries)
1348 {
1349  int found = 0;
1350  int i;
1351  int j;
1352 
1353  for (i = 0; i < subset_entries; i++) {
1354  for (j = 0; j < *fullset_entries; j++) {
1355  if (srp_addr_equal (&fullset[j], &subset[i])) {
1356  found = 1;
1357  break;
1358  }
1359  }
1360  if (found == 0) {
1361  fullset[*fullset_entries] = subset[i];
1362  *fullset_entries = *fullset_entries + 1;
1363  }
1364  found = 0;
1365  }
1366  return;
1367 }
1368 
1369 static void memb_set_and_with_ring_id (
1370  struct srp_addr *set1,
1371  struct memb_ring_id *set1_ring_ids,
1372  int set1_entries,
1373  struct srp_addr *set2,
1374  int set2_entries,
1375  struct memb_ring_id *old_ring_id,
1376  struct srp_addr *and,
1377  int *and_entries)
1378 {
1379  int i;
1380  int j;
1381  int found = 0;
1382 
1383  *and_entries = 0;
1384 
1385  for (i = 0; i < set2_entries; i++) {
1386  for (j = 0; j < set1_entries; j++) {
1387  if (srp_addr_equal (&set1[j], &set2[i])) {
1388  if (memcmp (&set1_ring_ids[j], old_ring_id, sizeof (struct memb_ring_id)) == 0) {
1389  found = 1;
1390  }
1391  break;
1392  }
1393  }
1394  if (found) {
1395  and[*and_entries] = set1[j];
1396  *and_entries = *and_entries + 1;
1397  }
1398  found = 0;
1399  }
1400  return;
1401 }
1402 
1403 static void memb_set_log(
1404  struct totemsrp_instance *instance,
1405  int level,
1406  const char *string,
1407  struct srp_addr *list,
1408  int list_entries)
1409 {
1410  char int_buf[32];
1411  char list_str[512];
1412  int i;
1413 
1414  memset(list_str, 0, sizeof(list_str));
1415 
1416  for (i = 0; i < list_entries; i++) {
1417  if (i == 0) {
1418  snprintf(int_buf, sizeof(int_buf), CS_PRI_NODE_ID, list[i].nodeid);
1419  } else {
1420  snprintf(int_buf, sizeof(int_buf), "," CS_PRI_NODE_ID, list[i].nodeid);
1421  }
1422 
1423  if (strlen(list_str) + strlen(int_buf) >= sizeof(list_str)) {
1424  break ;
1425  }
1426  strcat(list_str, int_buf);
1427  }
1428 
1429  log_printf(level, "List '%s' contains %d entries: %s", string, list_entries, list_str);
1430 }
1431 
1432 static void my_leave_memb_clear(
1433  struct totemsrp_instance *instance)
1434 {
1435  memset(instance->my_leave_memb_list, 0, sizeof(instance->my_leave_memb_list));
1436  instance->my_leave_memb_entries = 0;
1437 }
1438 
1439 static unsigned int my_leave_memb_match(
1440  struct totemsrp_instance *instance,
1441  unsigned int nodeid)
1442 {
1443  int i;
1444  unsigned int ret = 0;
1445 
1446  for (i = 0; i < instance->my_leave_memb_entries; i++){
1447  if (instance->my_leave_memb_list[i] == nodeid){
1448  ret = nodeid;
1449  break;
1450  }
1451  }
1452  return ret;
1453 }
1454 
1455 static void my_leave_memb_set(
1456  struct totemsrp_instance *instance,
1457  unsigned int nodeid)
1458 {
1459  int i, found = 0;
1460  for (i = 0; i < instance->my_leave_memb_entries; i++){
1461  if (instance->my_leave_memb_list[i] == nodeid){
1462  found = 1;
1463  break;
1464  }
1465  }
1466  if (found == 1) {
1467  return;
1468  }
1469  if (instance->my_leave_memb_entries < (PROCESSOR_COUNT_MAX - 1)) {
1470  instance->my_leave_memb_list[instance->my_leave_memb_entries] = nodeid;
1471  instance->my_leave_memb_entries++;
1472  } else {
1474  "Cannot set LEAVE nodeid=" CS_PRI_NODE_ID, nodeid);
1475  }
1476 }
1477 
1478 
1479 static void *totemsrp_buffer_alloc (struct totemsrp_instance *instance)
1480 {
1481  assert (instance != NULL);
1482  return totemnet_buffer_alloc (instance->totemnet_context);
1483 }
1484 
1485 static void totemsrp_buffer_release (struct totemsrp_instance *instance, void *ptr)
1486 {
1487  assert (instance != NULL);
1488  totemnet_buffer_release (instance->totemnet_context, ptr);
1489 }
1490 
1491 static void reset_token_retransmit_timeout (struct totemsrp_instance *instance)
1492 {
1493  int32_t res;
1494 
1495  qb_loop_timer_del (instance->totemsrp_poll_handle,
1497  res = qb_loop_timer_add (instance->totemsrp_poll_handle,
1498  QB_LOOP_MED,
1499  instance->totem_config->token_retransmit_timeout*QB_TIME_NS_IN_MSEC,
1500  (void *)instance,
1501  timer_function_token_retransmit_timeout,
1502  &instance->timer_orf_token_retransmit_timeout);
1503  if (res != 0) {
1504  log_printf(instance->totemsrp_log_level_error, "reset_token_retransmit_timeout - qb_loop_timer_add error : %d", res);
1505  }
1506 
1507 }
1508 
1509 static void start_merge_detect_timeout (struct totemsrp_instance *instance)
1510 {
1511  int32_t res;
1512 
1513  if (instance->my_merge_detect_timeout_outstanding == 0) {
1514  res = qb_loop_timer_add (instance->totemsrp_poll_handle,
1515  QB_LOOP_MED,
1516  instance->totem_config->merge_timeout*QB_TIME_NS_IN_MSEC,
1517  (void *)instance,
1518  timer_function_merge_detect_timeout,
1519  &instance->timer_merge_detect_timeout);
1520  if (res != 0) {
1521  log_printf(instance->totemsrp_log_level_error, "start_merge_detect_timeout - qb_loop_timer_add error : %d", res);
1522  }
1523 
1525  }
1526 }
1527 
1528 static void cancel_merge_detect_timeout (struct totemsrp_instance *instance)
1529 {
1530  qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_merge_detect_timeout);
1532 }
1533 
1534 /*
1535  * ring_state_* is used to save and restore the sort queue
1536  * state when a recovery operation fails (and enters gather)
1537  */
1538 static void old_ring_state_save (struct totemsrp_instance *instance)
1539 {
1540  if (instance->old_ring_state_saved == 0) {
1541  instance->old_ring_state_saved = 1;
1542  memcpy (&instance->my_old_ring_id, &instance->my_ring_id,
1543  sizeof (struct memb_ring_id));
1544  instance->old_ring_state_aru = instance->my_aru;
1547  "Saving state aru %x high seq received %x",
1548  instance->my_aru, instance->my_high_seq_received);
1549  }
1550 }
1551 
1552 static void old_ring_state_restore (struct totemsrp_instance *instance)
1553 {
1554  instance->my_aru = instance->old_ring_state_aru;
1557  "Restoring instance->my_aru %x my high seq received %x",
1558  instance->my_aru, instance->my_high_seq_received);
1559 }
1560 
1561 static void old_ring_state_reset (struct totemsrp_instance *instance)
1562 {
1564  "Resetting old ring state");
1565  instance->old_ring_state_saved = 0;
1566 }
1567 
1568 static void reset_pause_timeout (struct totemsrp_instance *instance)
1569 {
1570  int32_t res;
1571 
1572  qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_pause_timeout);
1573  res = qb_loop_timer_add (instance->totemsrp_poll_handle,
1574  QB_LOOP_MED,
1575  instance->totem_config->token_timeout * QB_TIME_NS_IN_MSEC / 5,
1576  (void *)instance,
1577  timer_function_pause_timeout,
1578  &instance->timer_pause_timeout);
1579  if (res != 0) {
1580  log_printf(instance->totemsrp_log_level_error, "reset_pause_timeout - qb_loop_timer_add error : %d", res);
1581  }
1582 }
1583 
1584 static void reset_token_warning (struct totemsrp_instance *instance) {
1585  int32_t res;
1586 
1587  qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_warning);
1588  res = qb_loop_timer_add (instance->totemsrp_poll_handle,
1589  QB_LOOP_MED,
1590  instance->totem_config->token_warning * instance->totem_config->token_timeout / 100 * QB_TIME_NS_IN_MSEC,
1591  (void *)instance,
1592  timer_function_orf_token_warning,
1593  &instance->timer_orf_token_warning);
1594  if (res != 0) {
1595  log_printf(instance->totemsrp_log_level_error, "reset_token_warning - qb_loop_timer_add error : %d", res);
1596  }
1597 }
1598 
1599 static void reset_token_timeout (struct totemsrp_instance *instance) {
1600  int32_t res;
1601 
1602  qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_timeout);
1603  res = qb_loop_timer_add (instance->totemsrp_poll_handle,
1604  QB_LOOP_MED,
1605  instance->totem_config->token_timeout*QB_TIME_NS_IN_MSEC,
1606  (void *)instance,
1607  timer_function_orf_token_timeout,
1608  &instance->timer_orf_token_timeout);
1609  if (res != 0) {
1610  log_printf(instance->totemsrp_log_level_error, "reset_token_timeout - qb_loop_timer_add error : %d", res);
1611  }
1612 
1613  if (instance->totem_config->token_warning)
1614  reset_token_warning(instance);
1615 }
1616 
1617 static void reset_heartbeat_timeout (struct totemsrp_instance *instance) {
1618  int32_t res;
1619 
1620  qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_heartbeat_timeout);
1621  res = qb_loop_timer_add (instance->totemsrp_poll_handle,
1622  QB_LOOP_MED,
1623  instance->heartbeat_timeout*QB_TIME_NS_IN_MSEC,
1624  (void *)instance,
1625  timer_function_heartbeat_timeout,
1626  &instance->timer_heartbeat_timeout);
1627  if (res != 0) {
1628  log_printf(instance->totemsrp_log_level_error, "reset_heartbeat_timeout - qb_loop_timer_add error : %d", res);
1629  }
1630 }
1631 
1632 
1633 static void cancel_token_warning (struct totemsrp_instance *instance) {
1634  qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_warning);
1635 }
1636 
1637 static void cancel_token_timeout (struct totemsrp_instance *instance) {
1638  qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_timeout);
1639 
1640  if (instance->totem_config->token_warning)
1641  cancel_token_warning(instance);
1642 }
1643 
1644 static void cancel_heartbeat_timeout (struct totemsrp_instance *instance) {
1645  qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_heartbeat_timeout);
1646 }
1647 
1648 static void cancel_token_retransmit_timeout (struct totemsrp_instance *instance)
1649 {
1650  qb_loop_timer_del (instance->totemsrp_poll_handle, instance->timer_orf_token_retransmit_timeout);
1651 }
1652 
1653 static void start_token_hold_retransmit_timeout (struct totemsrp_instance *instance)
1654 {
1655  int32_t res;
1656 
1657  res = qb_loop_timer_add (instance->totemsrp_poll_handle,
1658  QB_LOOP_MED,
1659  instance->totem_config->token_hold_timeout*QB_TIME_NS_IN_MSEC,
1660  (void *)instance,
1661  timer_function_token_hold_retransmit_timeout,
1662  &instance->timer_orf_token_hold_retransmit_timeout);
1663  if (res != 0) {
1664  log_printf(instance->totemsrp_log_level_error, "start_token_hold_retransmit_timeout - qb_loop_timer_add error : %d", res);
1665  }
1666 }
1667 
1668 static void cancel_token_hold_retransmit_timeout (struct totemsrp_instance *instance)
1669 {
1670  qb_loop_timer_del (instance->totemsrp_poll_handle,
1672 }
1673 
1674 static void memb_state_consensus_timeout_expired (
1675  struct totemsrp_instance *instance)
1676 {
1677  struct srp_addr no_consensus_list[PROCESSOR_COUNT_MAX];
1678  int no_consensus_list_entries;
1679 
1680  instance->stats.consensus_timeouts++;
1681  if (memb_consensus_agreed (instance)) {
1682  memb_consensus_reset (instance);
1683 
1684  memb_consensus_set (instance, &instance->my_id);
1685 
1686  reset_token_timeout (instance); // REVIEWED
1687  } else {
1688  memb_consensus_notset (
1689  instance,
1690  no_consensus_list,
1691  &no_consensus_list_entries,
1692  instance->my_proc_list,
1693  instance->my_proc_list_entries);
1694 
1695  memb_set_merge (no_consensus_list, no_consensus_list_entries,
1696  instance->my_failed_list, &instance->my_failed_list_entries);
1697  memb_state_gather_enter (instance, TOTEMSRP_GSFROM_CONSENSUS_TIMEOUT);
1698  }
1699 }
1700 
1701 static void memb_join_message_send (struct totemsrp_instance *instance);
1702 
1703 static void memb_merge_detect_transmit (struct totemsrp_instance *instance);
1704 
1705 /*
1706  * Timers used for various states of the membership algorithm
1707  */
1708 static void timer_function_pause_timeout (void *data)
1709 {
1710  struct totemsrp_instance *instance = data;
1711 
1712  instance->pause_timestamp = qb_util_nano_current_get ();
1713  reset_pause_timeout (instance);
1714 }
1715 
1716 static void memb_recovery_state_token_loss (struct totemsrp_instance *instance)
1717 {
1718  old_ring_state_restore (instance);
1719  memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_RECOVERY_STATE);
1720  instance->stats.recovery_token_lost++;
1721 }
1722 
1723 static void timer_function_orf_token_warning (void *data)
1724 {
1725  struct totemsrp_instance *instance = data;
1726  uint64_t tv_diff;
1727 
1728  /* need to protect against the case where token_warning is set to 0 dynamically */
1729  if (instance->totem_config->token_warning) {
1730  tv_diff = qb_util_nano_current_get () / QB_TIME_NS_IN_MSEC -
1731  instance->stats.token[instance->stats.latest_token].rx;
1733  "Token has not been received in %d ms ", (unsigned int) tv_diff);
1734  reset_token_warning(instance);
1735  } else {
1736  cancel_token_warning(instance);
1737  }
1738 }
1739 
1740 static void timer_function_orf_token_timeout (void *data)
1741 {
1742  struct totemsrp_instance *instance = data;
1743 
1744  switch (instance->memb_state) {
1747  "The token was lost in the OPERATIONAL state.");
1749  "A processor failed, forming new configuration:"
1750  " token timed out (%ums), waiting %ums for consensus.",
1751  instance->totem_config->token_timeout,
1752  instance->totem_config->consensus_timeout);
1754  memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_OPERATIONAL_STATE);
1755  instance->stats.operational_token_lost++;
1756  break;
1757 
1758  case MEMB_STATE_GATHER:
1760  "The consensus timeout expired (%ums).",
1761  instance->totem_config->consensus_timeout);
1762  memb_state_consensus_timeout_expired (instance);
1763  memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED);
1764  instance->stats.gather_token_lost++;
1765  break;
1766 
1767  case MEMB_STATE_COMMIT:
1769  "The token was lost in the COMMIT state.");
1770  memb_state_gather_enter (instance, TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_COMMIT_STATE);
1771  instance->stats.commit_token_lost++;
1772  break;
1773 
1774  case MEMB_STATE_RECOVERY:
1776  "The token was lost in the RECOVERY state.");
1777  memb_recovery_state_token_loss (instance);
1778  instance->orf_token_discard = 1;
1779  break;
1780  }
1781 }
1782 
1783 static void timer_function_heartbeat_timeout (void *data)
1784 {
1785  struct totemsrp_instance *instance = data;
1787  "HeartBeat Timer expired Invoking token loss mechanism in state %d ", instance->memb_state);
1788  timer_function_orf_token_timeout(data);
1789 }
1790 
1791 static void memb_timer_function_state_gather (void *data)
1792 {
1793  struct totemsrp_instance *instance = data;
1794  int32_t res;
1795 
1796  switch (instance->memb_state) {
1798  case MEMB_STATE_RECOVERY:
1799  assert (0); /* this should never happen */
1800  break;
1801  case MEMB_STATE_GATHER:
1802  case MEMB_STATE_COMMIT:
1803  memb_join_message_send (instance);
1804 
1805  /*
1806  * Restart the join timeout
1807  `*/
1808  qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout);
1809 
1810  res = qb_loop_timer_add (instance->totemsrp_poll_handle,
1811  QB_LOOP_MED,
1812  instance->totem_config->join_timeout*QB_TIME_NS_IN_MSEC,
1813  (void *)instance,
1814  memb_timer_function_state_gather,
1815  &instance->memb_timer_state_gather_join_timeout);
1816 
1817  if (res != 0) {
1818  log_printf(instance->totemsrp_log_level_error, "memb_timer_function_state_gather - qb_loop_timer_add error : %d", res);
1819  }
1820  break;
1821  }
1822 }
1823 
1824 static void memb_timer_function_gather_consensus_timeout (void *data)
1825 {
1826  struct totemsrp_instance *instance = data;
1827  memb_state_consensus_timeout_expired (instance);
1828 }
1829 
1830 static void deliver_messages_from_recovery_to_regular (struct totemsrp_instance *instance)
1831 {
1832  unsigned int i;
1833  struct sort_queue_item *recovery_message_item;
1834  struct sort_queue_item regular_message_item;
1835  unsigned int range = 0;
1836  int res;
1837  void *ptr;
1838  struct mcast *mcast;
1839 
1841  "recovery to regular %x-%x", SEQNO_START_MSG + 1, instance->my_aru);
1842 
1843  range = instance->my_aru - SEQNO_START_MSG;
1844  /*
1845  * Move messages from recovery to regular sort queue
1846  */
1847 // todo should i be initialized to 0 or 1 ?
1848  for (i = 1; i <= range; i++) {
1849  res = sq_item_get (&instance->recovery_sort_queue,
1850  i + SEQNO_START_MSG, &ptr);
1851  if (res != 0) {
1852  continue;
1853  }
1854  recovery_message_item = ptr;
1855 
1856  /*
1857  * Convert recovery message into regular message
1858  */
1859  mcast = recovery_message_item->mcast;
1861  /*
1862  * Message is a recovery message encapsulated
1863  * in a new ring message
1864  */
1865  regular_message_item.mcast =
1866  (struct mcast *)(((char *)recovery_message_item->mcast) + sizeof (struct mcast));
1867  regular_message_item.msg_len =
1868  recovery_message_item->msg_len - sizeof (struct mcast);
1869  mcast = regular_message_item.mcast;
1870  } else {
1871  /*
1872  * TODO this case shouldn't happen
1873  */
1874  continue;
1875  }
1876 
1878  "comparing if ring id is for this processors old ring seqno " CS_PRI_RING_ID_SEQ,
1879  (uint64_t)mcast->seq);
1880 
1881  /*
1882  * Only add this message to the regular sort
1883  * queue if it was originated with the same ring
1884  * id as the previous ring
1885  */
1886  if (memcmp (&instance->my_old_ring_id, &mcast->ring_id,
1887  sizeof (struct memb_ring_id)) == 0) {
1888 
1889  res = sq_item_inuse (&instance->regular_sort_queue, mcast->seq);
1890  if (res == 0) {
1891  sq_item_add (&instance->regular_sort_queue,
1892  &regular_message_item, mcast->seq);
1893  if (sq_lt_compare (instance->old_ring_state_high_seq_received, mcast->seq)) {
1895  }
1896  }
1897  } else {
1899  "-not adding msg with seq no " CS_PRI_RING_ID_SEQ, (uint64_t)mcast->seq);
1900  }
1901  }
1902 }
1903 
1904 /*
1905  * Change states in the state machine of the membership algorithm
1906  */
1907 static void memb_state_operational_enter (struct totemsrp_instance *instance)
1908 {
1909  struct srp_addr joined_list[PROCESSOR_COUNT_MAX];
1910  int joined_list_entries = 0;
1911  unsigned int aru_save;
1912  unsigned int joined_list_totemip[PROCESSOR_COUNT_MAX];
1913  unsigned int trans_memb_list_totemip[PROCESSOR_COUNT_MAX];
1914  unsigned int new_memb_list_totemip[PROCESSOR_COUNT_MAX];
1915  unsigned int left_list[PROCESSOR_COUNT_MAX];
1916  unsigned int i;
1917  unsigned int res;
1918  char left_node_msg[1024];
1919  char joined_node_msg[1024];
1920  char failed_node_msg[1024];
1921 
1922  instance->originated_orf_token = 0;
1923 
1924  memb_consensus_reset (instance);
1925 
1926  old_ring_state_reset (instance);
1927 
1928  deliver_messages_from_recovery_to_regular (instance);
1929 
1931  "Delivering to app %x to %x",
1932  instance->my_high_delivered + 1, instance->old_ring_state_high_seq_received);
1933 
1934  aru_save = instance->my_aru;
1935  instance->my_aru = instance->old_ring_state_aru;
1936 
1937  messages_deliver_to_app (instance, 0, instance->old_ring_state_high_seq_received);
1938 
1939  /*
1940  * Calculate joined and left list
1941  */
1942  memb_set_subtract (instance->my_left_memb_list,
1943  &instance->my_left_memb_entries,
1944  instance->my_memb_list, instance->my_memb_entries,
1945  instance->my_trans_memb_list, instance->my_trans_memb_entries);
1946 
1947  memb_set_subtract (joined_list, &joined_list_entries,
1948  instance->my_new_memb_list, instance->my_new_memb_entries,
1949  instance->my_trans_memb_list, instance->my_trans_memb_entries);
1950 
1951  /*
1952  * Install new membership
1953  */
1954  instance->my_memb_entries = instance->my_new_memb_entries;
1955  memcpy (&instance->my_memb_list, instance->my_new_memb_list,
1956  sizeof (struct srp_addr) * instance->my_memb_entries);
1957  instance->last_released = 0;
1958  instance->my_set_retrans_flg = 0;
1959 
1960  /*
1961  * Deliver transitional configuration to application
1962  */
1963  srp_addr_to_nodeid (instance, left_list, instance->my_left_memb_list,
1964  instance->my_left_memb_entries);
1965  srp_addr_to_nodeid (instance, trans_memb_list_totemip,
1966  instance->my_trans_memb_list, instance->my_trans_memb_entries);
1968  trans_memb_list_totemip, instance->my_trans_memb_entries,
1969  left_list, instance->my_left_memb_entries,
1970  0, 0, &instance->my_ring_id);
1971  instance->waiting_trans_ack = 1;
1972  instance->totemsrp_waiting_trans_ack_cb_fn (1);
1973 
1974 // TODO we need to filter to ensure we only deliver those
1975 // messages which are part of instance->my_deliver_memb
1976  messages_deliver_to_app (instance, 1, instance->old_ring_state_high_seq_received);
1977 
1978  instance->my_aru = aru_save;
1979 
1980  /*
1981  * Deliver regular configuration to application
1982  */
1983  srp_addr_to_nodeid (instance, new_memb_list_totemip,
1984  instance->my_new_memb_list, instance->my_new_memb_entries);
1985  srp_addr_to_nodeid (instance, joined_list_totemip, joined_list,
1986  joined_list_entries);
1988  new_memb_list_totemip, instance->my_new_memb_entries,
1989  0, 0,
1990  joined_list_totemip, joined_list_entries, &instance->my_ring_id);
1991 
1992  /*
1993  * The recovery sort queue now becomes the regular
1994  * sort queue. It is necessary to copy the state
1995  * into the regular sort queue.
1996  */
1997  sq_copy (&instance->regular_sort_queue, &instance->recovery_sort_queue);
1998  instance->my_last_aru = SEQNO_START_MSG;
1999 
2000  /* When making my_proc_list smaller, ensure that the
2001  * now non-used entries are zero-ed out. There are some suspect
2002  * assert's that assume that there is always 2 entries in the list.
2003  * These fail when my_proc_list is reduced to 1 entry (and the
2004  * valid [0] entry is the same as the 'unused' [1] entry).
2005  */
2006  memset(instance->my_proc_list, 0,
2007  sizeof (struct srp_addr) * instance->my_proc_list_entries);
2008 
2009  instance->my_proc_list_entries = instance->my_new_memb_entries;
2010  memcpy (instance->my_proc_list, instance->my_new_memb_list,
2011  sizeof (struct srp_addr) * instance->my_memb_entries);
2012 
2013  instance->my_failed_list_entries = 0;
2014  /*
2015  * TODO Not exactly to spec
2016  *
2017  * At the entry to this function all messages without a gap are
2018  * deliered.
2019  *
2020  * This code throw away messages from the last gap in the sort queue
2021  * to my_high_seq_received
2022  *
2023  * What should really happen is we should deliver all messages up to
2024  * a gap, then delier the transitional configuration, then deliver
2025  * the messages between the first gap and my_high_seq_received, then
2026  * deliver a regular configuration, then deliver the regular
2027  * configuration
2028  *
2029  * Unfortunately totempg doesn't appear to like this operating mode
2030  * which needs more inspection
2031  */
2032  i = instance->my_high_seq_received + 1;
2033  do {
2034  void *ptr;
2035 
2036  i -= 1;
2037  res = sq_item_get (&instance->regular_sort_queue, i, &ptr);
2038  if (i == 0) {
2039  break;
2040  }
2041  } while (res);
2042 
2043  instance->my_high_delivered = i;
2044 
2045  for (i = 0; i <= instance->my_high_delivered; i++) {
2046  void *ptr;
2047 
2048  res = sq_item_get (&instance->regular_sort_queue, i, &ptr);
2049  if (res == 0) {
2050  struct sort_queue_item *regular_message;
2051 
2052  regular_message = ptr;
2053  free (regular_message->mcast);
2054  }
2055  }
2056  sq_items_release (&instance->regular_sort_queue, instance->my_high_delivered);
2057  instance->last_released = instance->my_high_delivered;
2058 
2059  if (joined_list_entries) {
2060  int sptr = 0;
2061  sptr += snprintf(joined_node_msg, sizeof(joined_node_msg)-sptr, " joined:");
2062  for (i=0; i< joined_list_entries; i++) {
2063  sptr += snprintf(joined_node_msg+sptr, sizeof(joined_node_msg)-sptr, " " CS_PRI_NODE_ID, joined_list_totemip[i]);
2064  }
2065  }
2066  else {
2067  joined_node_msg[0] = '\0';
2068  }
2069 
2070  if (instance->my_left_memb_entries) {
2071  int sptr = 0;
2072  int sptr2 = 0;
2073  sptr += snprintf(left_node_msg, sizeof(left_node_msg)-sptr, " left:");
2074  for (i=0; i< instance->my_left_memb_entries; i++) {
2075  sptr += snprintf(left_node_msg+sptr, sizeof(left_node_msg)-sptr, " " CS_PRI_NODE_ID, left_list[i]);
2076  }
2077  for (i=0; i< instance->my_left_memb_entries; i++) {
2078  if (my_leave_memb_match(instance, left_list[i]) == 0) {
2079  if (sptr2 == 0) {
2080  sptr2 += snprintf(failed_node_msg, sizeof(failed_node_msg)-sptr2, " failed:");
2081  }
2082  sptr2 += snprintf(failed_node_msg+sptr2, sizeof(left_node_msg)-sptr2, " " CS_PRI_NODE_ID, left_list[i]);
2083  }
2084  }
2085  if (sptr2 == 0) {
2086  failed_node_msg[0] = '\0';
2087  }
2088  }
2089  else {
2090  left_node_msg[0] = '\0';
2091  failed_node_msg[0] = '\0';
2092  }
2093 
2094  my_leave_memb_clear(instance);
2095 
2097  "entering OPERATIONAL state.");
2099  "A new membership (" CS_PRI_RING_ID ") was formed. Members%s%s",
2100  instance->my_ring_id.rep,
2101  (uint64_t)instance->my_ring_id.seq,
2102  joined_node_msg,
2103  left_node_msg);
2104 
2105  if (strlen(failed_node_msg)) {
2107  "Failed to receive the leave message.%s",
2108  failed_node_msg);
2109  }
2110 
2111  instance->memb_state = MEMB_STATE_OPERATIONAL;
2112 
2113  instance->stats.operational_entered++;
2114  instance->stats.continuous_gather = 0;
2115 
2116  instance->my_received_flg = 1;
2117 
2118  reset_pause_timeout (instance);
2119 
2120  /*
2121  * Save ring id information from this configuration to determine
2122  * which processors are transitioning from old regular configuration
2123  * in to new regular configuration on the next configuration change
2124  */
2125  memcpy (&instance->my_old_ring_id, &instance->my_ring_id,
2126  sizeof (struct memb_ring_id));
2127 
2128  return;
2129 }
2130 
2131 static void memb_state_gather_enter (
2132  struct totemsrp_instance *instance,
2133  enum gather_state_from gather_from)
2134 {
2135  int32_t res;
2136 
2137  instance->orf_token_discard = 1;
2138 
2139  instance->originated_orf_token = 0;
2140 
2141  memb_set_merge (
2142  &instance->my_id, 1,
2143  instance->my_proc_list, &instance->my_proc_list_entries);
2144 
2145  memb_join_message_send (instance);
2146 
2147  /*
2148  * Restart the join timeout
2149  */
2150  qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout);
2151 
2152  res = qb_loop_timer_add (instance->totemsrp_poll_handle,
2153  QB_LOOP_MED,
2154  instance->totem_config->join_timeout*QB_TIME_NS_IN_MSEC,
2155  (void *)instance,
2156  memb_timer_function_state_gather,
2157  &instance->memb_timer_state_gather_join_timeout);
2158  if (res != 0) {
2159  log_printf(instance->totemsrp_log_level_error, "memb_state_gather_enter - qb_loop_timer_add error(1) : %d", res);
2160  }
2161 
2162  /*
2163  * Restart the consensus timeout
2164  */
2165  qb_loop_timer_del (instance->totemsrp_poll_handle,
2167 
2168  res = qb_loop_timer_add (instance->totemsrp_poll_handle,
2169  QB_LOOP_MED,
2170  instance->totem_config->consensus_timeout*QB_TIME_NS_IN_MSEC,
2171  (void *)instance,
2172  memb_timer_function_gather_consensus_timeout,
2173  &instance->memb_timer_state_gather_consensus_timeout);
2174  if (res != 0) {
2175  log_printf(instance->totemsrp_log_level_error, "memb_state_gather_enter - qb_loop_timer_add error(2) : %d", res);
2176  }
2177 
2178  /*
2179  * Cancel the token loss and token retransmission timeouts
2180  */
2181  cancel_token_retransmit_timeout (instance); // REVIEWED
2182  cancel_token_timeout (instance); // REVIEWED
2183  cancel_merge_detect_timeout (instance);
2184 
2185  memb_consensus_reset (instance);
2186 
2187  memb_consensus_set (instance, &instance->my_id);
2188 
2190  "entering GATHER state from %d(%s).",
2191  gather_from, gsfrom_to_msg(gather_from));
2192 
2193  instance->memb_state = MEMB_STATE_GATHER;
2194  instance->stats.gather_entered++;
2195 
2196  if (gather_from == TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED) {
2197  /*
2198  * State 3 means gather, so we are continuously gathering.
2199  */
2200  instance->stats.continuous_gather++;
2201  }
2202 
2203  return;
2204 }
2205 
2206 static void timer_function_token_retransmit_timeout (void *data);
2207 
2208 static void target_set_completed (
2209  void *context)
2210 {
2211  struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
2212 
2213  memb_state_commit_token_send (instance);
2214 
2215 }
2216 
2217 static void memb_state_commit_enter (
2218  struct totemsrp_instance *instance)
2219 {
2220  old_ring_state_save (instance);
2221 
2222  memb_state_commit_token_update (instance);
2223 
2224  memb_state_commit_token_target_set (instance);
2225 
2226  qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_join_timeout);
2227 
2229 
2230  qb_loop_timer_del (instance->totemsrp_poll_handle, instance->memb_timer_state_gather_consensus_timeout);
2231 
2233 
2234  memb_ring_id_set (instance, &instance->commit_token->ring_id);
2235 
2236  instance->memb_ring_id_store (&instance->my_ring_id, instance->my_id.nodeid);
2237 
2238  instance->token_ring_id_seq = instance->my_ring_id.seq;
2239 
2241  "entering COMMIT state.");
2242 
2243  instance->memb_state = MEMB_STATE_COMMIT;
2244  reset_token_retransmit_timeout (instance); // REVIEWED
2245  reset_token_timeout (instance); // REVIEWED
2246 
2247  instance->stats.commit_entered++;
2248  instance->stats.continuous_gather = 0;
2249 
2250  /*
2251  * reset all flow control variables since we are starting a new ring
2252  */
2253  instance->my_trc = 0;
2254  instance->my_pbl = 0;
2255  instance->my_cbl = 0;
2256  /*
2257  * commit token sent after callback that token target has been set
2258  */
2259 }
2260 
2261 static void memb_state_recovery_enter (
2262  struct totemsrp_instance *instance,
2264 {
2265  int i;
2266  int local_received_flg = 1;
2267  unsigned int low_ring_aru;
2268  unsigned int range = 0;
2269  unsigned int messages_originated = 0;
2270  const struct srp_addr *addr;
2271  struct memb_commit_token_memb_entry *memb_list;
2272  struct memb_ring_id my_new_memb_ring_id_list[PROCESSOR_COUNT_MAX];
2273 
2274  addr = (const struct srp_addr *)commit_token->end_of_commit_token;
2275  memb_list = (struct memb_commit_token_memb_entry *)(addr + commit_token->addr_entries);
2276 
2278  "entering RECOVERY state.");
2279 
2280  instance->orf_token_discard = 0;
2281 
2282  instance->my_high_ring_delivered = 0;
2283 
2284  sq_reinit (&instance->recovery_sort_queue, SEQNO_START_MSG);
2285  cs_queue_reinit (&instance->retrans_message_queue);
2286 
2287  low_ring_aru = instance->old_ring_state_high_seq_received;
2288 
2289  memb_state_commit_token_send_recovery (instance, commit_token);
2290 
2291  instance->my_token_seq = SEQNO_START_TOKEN - 1;
2292 
2293  /*
2294  * Build regular configuration
2295  */
2297  instance->totemnet_context,
2298  commit_token->addr_entries);
2299 
2300  /*
2301  * Build transitional configuration
2302  */
2303  for (i = 0; i < instance->my_new_memb_entries; i++) {
2304  memcpy (&my_new_memb_ring_id_list[i],
2305  &memb_list[i].ring_id,
2306  sizeof (struct memb_ring_id));
2307  }
2308  memb_set_and_with_ring_id (
2309  instance->my_new_memb_list,
2310  my_new_memb_ring_id_list,
2311  instance->my_new_memb_entries,
2312  instance->my_memb_list,
2313  instance->my_memb_entries,
2314  &instance->my_old_ring_id,
2315  instance->my_trans_memb_list,
2316  &instance->my_trans_memb_entries);
2317 
2318  for (i = 0; i < instance->my_trans_memb_entries; i++) {
2320  "TRANS [%d] member " CS_PRI_NODE_ID ":", i, instance->my_trans_memb_list[i].nodeid);
2321  }
2322  for (i = 0; i < instance->my_new_memb_entries; i++) {
2324  "position [%d] member " CS_PRI_NODE_ID ":", i, addr[i].nodeid);
2326  "previous ringid (" CS_PRI_RING_ID ")",
2327  memb_list[i].ring_id.rep, (uint64_t)memb_list[i].ring_id.seq);
2328 
2330  "aru %x high delivered %x received flag %d",
2331  memb_list[i].aru,
2332  memb_list[i].high_delivered,
2333  memb_list[i].received_flg);
2334 
2335  // assert (totemip_print (&memb_list[i].ring_id.rep) != 0);
2336  }
2337  /*
2338  * Determine if any received flag is false
2339  */
2340  for (i = 0; i < commit_token->addr_entries; i++) {
2341  if (memb_set_subset (&instance->my_new_memb_list[i], 1,
2342  instance->my_trans_memb_list, instance->my_trans_memb_entries) &&
2343 
2344  memb_list[i].received_flg == 0) {
2345  instance->my_deliver_memb_entries = instance->my_trans_memb_entries;
2346  memcpy (instance->my_deliver_memb_list, instance->my_trans_memb_list,
2347  sizeof (struct srp_addr) * instance->my_trans_memb_entries);
2348  local_received_flg = 0;
2349  break;
2350  }
2351  }
2352  if (local_received_flg == 1) {
2353  goto no_originate;
2354  } /* Else originate messages if we should */
2355 
2356  /*
2357  * Calculate my_low_ring_aru, instance->my_high_ring_delivered for the transitional membership
2358  */
2359  for (i = 0; i < commit_token->addr_entries; i++) {
2360  if (memb_set_subset (&instance->my_new_memb_list[i], 1,
2361  instance->my_deliver_memb_list,
2362  instance->my_deliver_memb_entries) &&
2363 
2364  memcmp (&instance->my_old_ring_id,
2365  &memb_list[i].ring_id,
2366  sizeof (struct memb_ring_id)) == 0) {
2367 
2368  if (sq_lt_compare (memb_list[i].aru, low_ring_aru)) {
2369 
2370  low_ring_aru = memb_list[i].aru;
2371  }
2372  if (sq_lt_compare (instance->my_high_ring_delivered, memb_list[i].high_delivered)) {
2373  instance->my_high_ring_delivered = memb_list[i].high_delivered;
2374  }
2375  }
2376  }
2377 
2378  /*
2379  * Copy all old ring messages to instance->retrans_message_queue
2380  */
2381  range = instance->old_ring_state_high_seq_received - low_ring_aru;
2382  if (range == 0) {
2383  /*
2384  * No messages to copy
2385  */
2386  goto no_originate;
2387  }
2388  assert (range < QUEUE_RTR_ITEMS_SIZE_MAX);
2389 
2391  "copying all old ring messages from %x-%x.",
2392  low_ring_aru + 1, instance->old_ring_state_high_seq_received);
2393 
2394  for (i = 1; i <= range; i++) {
2396  struct message_item message_item;
2397  void *ptr;
2398  int res;
2399 
2400  res = sq_item_get (&instance->regular_sort_queue,
2401  low_ring_aru + i, &ptr);
2402  if (res != 0) {
2403  continue;
2404  }
2405  sort_queue_item = ptr;
2406  messages_originated++;
2407  memset (&message_item, 0, sizeof (struct message_item));
2408  // TODO LEAK
2409  message_item.mcast = totemsrp_buffer_alloc (instance);
2410  assert (message_item.mcast);
2411  memset(message_item.mcast, 0, sizeof (struct mcast));
2415  message_item.mcast->system_from = instance->my_id;
2417 
2418  message_item.mcast->header.nodeid = instance->my_id.nodeid;
2419  assert (message_item.mcast->header.nodeid);
2420  memcpy (&message_item.mcast->ring_id, &instance->my_ring_id,
2421  sizeof (struct memb_ring_id));
2422  message_item.msg_len = sort_queue_item->msg_len + sizeof (struct mcast);
2423  memcpy (((char *)message_item.mcast) + sizeof (struct mcast),
2426  cs_queue_item_add (&instance->retrans_message_queue, &message_item);
2427  }
2429  "Originated %d messages in RECOVERY.", messages_originated);
2430  goto originated;
2431 
2432 no_originate:
2434  "Did not need to originate any messages in recovery.");
2435 
2436 originated:
2437  instance->my_aru = SEQNO_START_MSG;
2438  instance->my_aru_count = 0;
2439  instance->my_seq_unchanged = 0;
2441  instance->my_install_seq = SEQNO_START_MSG;
2442  instance->last_released = SEQNO_START_MSG;
2443 
2444  reset_token_timeout (instance); // REVIEWED
2445  reset_token_retransmit_timeout (instance); // REVIEWED
2446 
2447  instance->memb_state = MEMB_STATE_RECOVERY;
2448  instance->stats.recovery_entered++;
2449  instance->stats.continuous_gather = 0;
2450 
2451  return;
2452 }
2453 
2454 void totemsrp_event_signal (void *srp_context, enum totem_event_type type, int value)
2455 {
2456  struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
2457 
2458  token_hold_cancel_send (instance);
2459 
2460  return;
2461 }
2462 
2464  void *srp_context,
2465  struct iovec *iovec,
2466  unsigned int iov_len,
2467  int guarantee)
2468 {
2469  struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
2470  int i;
2471  struct message_item message_item;
2472  char *addr;
2473  unsigned int addr_idx;
2474  struct cs_queue *queue_use;
2475 
2476  if (instance->waiting_trans_ack) {
2477  queue_use = &instance->new_message_queue_trans;
2478  } else {
2479  queue_use = &instance->new_message_queue;
2480  }
2481 
2482  if (cs_queue_is_full (queue_use)) {
2483  log_printf (instance->totemsrp_log_level_debug, "queue full");
2484  return (-1);
2485  }
2486 
2487  memset (&message_item, 0, sizeof (struct message_item));
2488 
2489  /*
2490  * Allocate pending item
2491  */
2492  message_item.mcast = totemsrp_buffer_alloc (instance);
2493  if (message_item.mcast == 0) {
2494  goto error_mcast;
2495  }
2496 
2497  /*
2498  * Set mcast header
2499  */
2500  memset(message_item.mcast, 0, sizeof (struct mcast));
2505 
2506  message_item.mcast->header.nodeid = instance->my_id.nodeid;
2507  assert (message_item.mcast->header.nodeid);
2508 
2510  message_item.mcast->system_from = instance->my_id;
2511 
2512  addr = (char *)message_item.mcast;
2513  addr_idx = sizeof (struct mcast);
2514  for (i = 0; i < iov_len; i++) {
2515  memcpy (&addr[addr_idx], iovec[i].iov_base, iovec[i].iov_len);
2516  addr_idx += iovec[i].iov_len;
2517  }
2518 
2519  message_item.msg_len = addr_idx;
2520 
2521  log_printf (instance->totemsrp_log_level_trace, "mcasted message added to pending queue");
2522  instance->stats.mcast_tx++;
2523  cs_queue_item_add (queue_use, &message_item);
2524 
2525  return (0);
2526 
2527 error_mcast:
2528  return (-1);
2529 }
2530 
2531 /*
2532  * Determine if there is room to queue a new message
2533  */
2534 int totemsrp_avail (void *srp_context)
2535 {
2536  struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
2537  int avail;
2538  struct cs_queue *queue_use;
2539 
2540  if (instance->waiting_trans_ack) {
2541  queue_use = &instance->new_message_queue_trans;
2542  } else {
2543  queue_use = &instance->new_message_queue;
2544  }
2545  cs_queue_avail (queue_use, &avail);
2546 
2547  return (avail);
2548 }
2549 
2550 /*
2551  * ORF Token Management
2552  */
2553 /*
2554  * Recast message to mcast group if it is available
2555  */
2556 static int orf_token_remcast (
2557  struct totemsrp_instance *instance,
2558  int seq)
2559 {
2561  int res;
2562  void *ptr;
2563 
2564  struct sq *sort_queue;
2565 
2566  if (instance->memb_state == MEMB_STATE_RECOVERY) {
2567  sort_queue = &instance->recovery_sort_queue;
2568  } else {
2569  sort_queue = &instance->regular_sort_queue;
2570  }
2571 
2572  res = sq_in_range (sort_queue, seq);
2573  if (res == 0) {
2574  log_printf (instance->totemsrp_log_level_debug, "sq not in range");
2575  return (-1);
2576  }
2577 
2578  /*
2579  * Get RTR item at seq, if not available, return
2580  */
2581  res = sq_item_get (sort_queue, seq, &ptr);
2582  if (res != 0) {
2583  return -1;
2584  }
2585 
2586  sort_queue_item = ptr;
2587 
2589  instance->totemnet_context,
2592 
2593  return (0);
2594 }
2595 
2596 
2597 /*
2598  * Free all freeable messages from ring
2599  */
2600 static void messages_free (
2601  struct totemsrp_instance *instance,
2602  unsigned int token_aru)
2603 {
2604  struct sort_queue_item *regular_message;
2605  unsigned int i;
2606  int res;
2607  int log_release = 0;
2608  unsigned int release_to;
2609  unsigned int range = 0;
2610 
2611  release_to = token_aru;
2612  if (sq_lt_compare (instance->my_last_aru, release_to)) {
2613  release_to = instance->my_last_aru;
2614  }
2615  if (sq_lt_compare (instance->my_high_delivered, release_to)) {
2616  release_to = instance->my_high_delivered;
2617  }
2618 
2619  /*
2620  * Ensure we dont try release before an already released point
2621  */
2622  if (sq_lt_compare (release_to, instance->last_released)) {
2623  return;
2624  }
2625 
2626  range = release_to - instance->last_released;
2627  assert (range < QUEUE_RTR_ITEMS_SIZE_MAX);
2628 
2629  /*
2630  * Release retransmit list items if group aru indicates they are transmitted
2631  */
2632  for (i = 1; i <= range; i++) {
2633  void *ptr;
2634 
2635  res = sq_item_get (&instance->regular_sort_queue,
2636  instance->last_released + i, &ptr);
2637  if (res == 0) {
2638  regular_message = ptr;
2639  totemsrp_buffer_release (instance, regular_message->mcast);
2640  }
2641  sq_items_release (&instance->regular_sort_queue,
2642  instance->last_released + i);
2643 
2644  log_release = 1;
2645  }
2646  instance->last_released += range;
2647 
2648  if (log_release) {
2650  "releasing messages up to and including %x", release_to);
2651  }
2652 }
2653 
2654 static void update_aru (
2655  struct totemsrp_instance *instance)
2656 {
2657  unsigned int i;
2658  int res;
2659  struct sq *sort_queue;
2660  unsigned int range;
2661  unsigned int my_aru_saved = 0;
2662 
2663  if (instance->memb_state == MEMB_STATE_RECOVERY) {
2664  sort_queue = &instance->recovery_sort_queue;
2665  } else {
2666  sort_queue = &instance->regular_sort_queue;
2667  }
2668 
2669  range = instance->my_high_seq_received - instance->my_aru;
2670 
2671  my_aru_saved = instance->my_aru;
2672  for (i = 1; i <= range; i++) {
2673 
2674  void *ptr;
2675 
2676  res = sq_item_get (sort_queue, my_aru_saved + i, &ptr);
2677  /*
2678  * If hole, stop updating aru
2679  */
2680  if (res != 0) {
2681  break;
2682  }
2683  }
2684  instance->my_aru += i - 1;
2685 }
2686 
2687 /*
2688  * Multicasts pending messages onto the ring (requires orf_token possession)
2689  */
2690 static int orf_token_mcast (
2691  struct totemsrp_instance *instance,
2692  struct orf_token *token,
2693  int fcc_mcasts_allowed)
2694 {
2695  struct message_item *message_item = 0;
2696  struct cs_queue *mcast_queue;
2697  struct sq *sort_queue;
2699  struct mcast *mcast;
2700  unsigned int fcc_mcast_current;
2701 
2702  if (instance->memb_state == MEMB_STATE_RECOVERY) {
2703  mcast_queue = &instance->retrans_message_queue;
2704  sort_queue = &instance->recovery_sort_queue;
2705  reset_token_retransmit_timeout (instance); // REVIEWED
2706  } else {
2707  if (instance->waiting_trans_ack) {
2708  mcast_queue = &instance->new_message_queue_trans;
2709  } else {
2710  mcast_queue = &instance->new_message_queue;
2711  }
2712 
2713  sort_queue = &instance->regular_sort_queue;
2714  }
2715 
2716  for (fcc_mcast_current = 0; fcc_mcast_current < fcc_mcasts_allowed; fcc_mcast_current++) {
2717  if (cs_queue_is_empty (mcast_queue)) {
2718  break;
2719  }
2720  message_item = (struct message_item *)cs_queue_item_get (mcast_queue);
2721 
2722  message_item->mcast->seq = ++token->seq;
2723  message_item->mcast->this_seqno = instance->global_seqno++;
2724 
2725  /*
2726  * Build IO vector
2727  */
2728  memset (&sort_queue_item, 0, sizeof (struct sort_queue_item));
2731 
2733 
2734  memcpy (&mcast->ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id));
2735 
2736  /*
2737  * Add message to retransmit queue
2738  */
2739  sq_item_add (sort_queue, &sort_queue_item, message_item->mcast->seq);
2740 
2742  instance->totemnet_context,
2745 
2746  /*
2747  * Delete item from pending queue
2748  */
2749  cs_queue_item_remove (mcast_queue);
2750 
2751  /*
2752  * If messages mcasted, deliver any new messages to totempg
2753  */
2754  instance->my_high_seq_received = token->seq;
2755  }
2756 
2757  update_aru (instance);
2758 
2759  /*
2760  * Return 1 if more messages are available for single node clusters
2761  */
2762  return (fcc_mcast_current);
2763 }
2764 
2765 /*
2766  * Remulticasts messages in orf_token's retransmit list (requires orf_token)
2767  * Modify's orf_token's rtr to include retransmits required by this process
2768  */
2769 static int orf_token_rtr (
2770  struct totemsrp_instance *instance,
2771  struct orf_token *orf_token,
2772  unsigned int *fcc_allowed)
2773 {
2774  unsigned int res;
2775  unsigned int i, j;
2776  unsigned int found;
2777  struct sq *sort_queue;
2778  struct rtr_item *rtr_list;
2779  unsigned int range = 0;
2780  char retransmit_msg[1024];
2781  char value[64];
2782 
2783  if (instance->memb_state == MEMB_STATE_RECOVERY) {
2784  sort_queue = &instance->recovery_sort_queue;
2785  } else {
2786  sort_queue = &instance->regular_sort_queue;
2787  }
2788 
2789  rtr_list = &orf_token->rtr_list[0];
2790 
2791  strcpy (retransmit_msg, "Retransmit List: ");
2792  if (orf_token->rtr_list_entries) {
2794  "Retransmit List %d", orf_token->rtr_list_entries);
2795  for (i = 0; i < orf_token->rtr_list_entries; i++) {
2796  sprintf (value, "%x ", rtr_list[i].seq);
2797  strcat (retransmit_msg, value);
2798  }
2799  strcat (retransmit_msg, "");
2801  "%s", retransmit_msg);
2802  }
2803 
2804  /*
2805  * Retransmit messages on orf_token's RTR list from RTR queue
2806  */
2807  for (instance->fcc_remcast_current = 0, i = 0;
2808  instance->fcc_remcast_current < *fcc_allowed && i < orf_token->rtr_list_entries;) {
2809 
2810  /*
2811  * If this retransmit request isn't from this configuration,
2812  * try next rtr entry
2813  */
2814  if (memcmp (&rtr_list[i].ring_id, &instance->my_ring_id,
2815  sizeof (struct memb_ring_id)) != 0) {
2816 
2817  i += 1;
2818  continue;
2819  }
2820 
2821  res = orf_token_remcast (instance, rtr_list[i].seq);
2822  if (res == 0) {
2823  /*
2824  * Multicasted message, so no need to copy to new retransmit list
2825  */
2827  assert (orf_token->rtr_list_entries >= 0);
2828  memmove (&rtr_list[i], &rtr_list[i + 1],
2829  sizeof (struct rtr_item) * (orf_token->rtr_list_entries - i));
2830 
2831  instance->stats.mcast_retx++;
2832  instance->fcc_remcast_current++;
2833  } else {
2834  i += 1;
2835  }
2836  }
2837  *fcc_allowed = *fcc_allowed - instance->fcc_remcast_current;
2838 
2839  /*
2840  * Add messages to retransmit to RTR list
2841  * but only retry if there is room in the retransmit list
2842  */
2843 
2844  range = orf_token->seq - instance->my_aru;
2845  assert (range < QUEUE_RTR_ITEMS_SIZE_MAX);
2846 
2848  (i <= range); i++) {
2849 
2850  /*
2851  * Ensure message is within the sort queue range
2852  */
2853  res = sq_in_range (sort_queue, instance->my_aru + i);
2854  if (res == 0) {
2855  break;
2856  }
2857 
2858  /*
2859  * Find if a message is missing from this processor
2860  */
2861  res = sq_item_inuse (sort_queue, instance->my_aru + i);
2862  if (res == 0) {
2863  /*
2864  * Determine how many times we have missed receiving
2865  * this sequence number. sq_item_miss_count increments
2866  * a counter for the sequence number. The miss count
2867  * will be returned and compared. This allows time for
2868  * delayed multicast messages to be received before
2869  * declaring the message is missing and requesting a
2870  * retransmit.
2871  */
2872  res = sq_item_miss_count (sort_queue, instance->my_aru + i);
2873  if (res < instance->totem_config->miss_count_const) {
2874  continue;
2875  }
2876 
2877  /*
2878  * Determine if missing message is already in retransmit list
2879  */
2880  found = 0;
2881  for (j = 0; j < orf_token->rtr_list_entries; j++) {
2882  if (instance->my_aru + i == rtr_list[j].seq) {
2883  found = 1;
2884  }
2885  }
2886  if (found == 0) {
2887  /*
2888  * Missing message not found in current retransmit list so add it
2889  */
2891  &instance->my_ring_id, sizeof (struct memb_ring_id));
2892  rtr_list[orf_token->rtr_list_entries].seq = instance->my_aru + i;
2894  }
2895  }
2896  }
2897  return (instance->fcc_remcast_current);
2898 }
2899 
2900 static void token_retransmit (struct totemsrp_instance *instance)
2901 {
2903  instance->orf_token_retransmit,
2904  instance->orf_token_retransmit_size);
2905 }
2906 
2907 /*
2908  * Retransmit the regular token if no mcast or token has
2909  * been received in retransmit token period retransmit
2910  * the token to the next processor
2911  */
2912 static void timer_function_token_retransmit_timeout (void *data)
2913 {
2914  struct totemsrp_instance *instance = data;
2915 
2916  switch (instance->memb_state) {
2917  case MEMB_STATE_GATHER:
2918  break;
2919  case MEMB_STATE_COMMIT:
2921  case MEMB_STATE_RECOVERY:
2922  token_retransmit (instance);
2923  reset_token_retransmit_timeout (instance); // REVIEWED
2924  break;
2925  }
2926 }
2927 
2928 static void timer_function_token_hold_retransmit_timeout (void *data)
2929 {
2930  struct totemsrp_instance *instance = data;
2931 
2932  switch (instance->memb_state) {
2933  case MEMB_STATE_GATHER:
2934  break;
2935  case MEMB_STATE_COMMIT:
2936  break;
2938  case MEMB_STATE_RECOVERY:
2939  token_retransmit (instance);
2940  break;
2941  }
2942 }
2943 
2944 static void timer_function_merge_detect_timeout(void *data)
2945 {
2946  struct totemsrp_instance *instance = data;
2947 
2949 
2950  switch (instance->memb_state) {
2952  if (instance->my_ring_id.rep == instance->my_id.nodeid) {
2953  memb_merge_detect_transmit (instance);
2954  }
2955  break;
2956  case MEMB_STATE_GATHER:
2957  case MEMB_STATE_COMMIT:
2958  case MEMB_STATE_RECOVERY:
2959  break;
2960  }
2961 }
2962 
2963 /*
2964  * Send orf_token to next member (requires orf_token)
2965  */
2966 static int token_send (
2967  struct totemsrp_instance *instance,
2968  struct orf_token *orf_token,
2969  int forward_token)
2970 {
2971  int res = 0;
2972  unsigned int orf_token_size;
2973 
2974  orf_token_size = sizeof (struct orf_token) +
2975  (orf_token->rtr_list_entries * sizeof (struct rtr_item));
2976 
2977  orf_token->header.nodeid = instance->my_id.nodeid;
2978  memcpy (instance->orf_token_retransmit, orf_token, orf_token_size);
2979  instance->orf_token_retransmit_size = orf_token_size;
2980  assert (orf_token->header.nodeid);
2981 
2982  if (forward_token == 0) {
2983  return (0);
2984  }
2985 
2987  orf_token,
2988  orf_token_size);
2989 
2990  return (res);
2991 }
2992 
2993 static int token_hold_cancel_send (struct totemsrp_instance *instance)
2994 {
2996 
2997  /*
2998  * Only cancel if the token is currently held
2999  */
3000  if (instance->my_token_held == 0) {
3001  return (0);
3002  }
3003  instance->my_token_held = 0;
3004 
3005  /*
3006  * Build message
3007  */
3013  memcpy (&token_hold_cancel.ring_id, &instance->my_ring_id,
3014  sizeof (struct memb_ring_id));
3015  assert (token_hold_cancel.header.nodeid);
3016 
3017  instance->stats.token_hold_cancel_tx++;
3018 
3020  sizeof (struct token_hold_cancel));
3021 
3022  return (0);
3023 }
3024 
3025 static int orf_token_send_initial (struct totemsrp_instance *instance)
3026 {
3027  struct orf_token orf_token;
3028  int res;
3029 
3034  orf_token.header.nodeid = instance->my_id.nodeid;
3035  assert (orf_token.header.nodeid);
3038  orf_token.retrans_flg = 1;
3039  instance->my_set_retrans_flg = 1;
3040  instance->stats.orf_token_tx++;
3041 
3042  if (cs_queue_is_empty (&instance->retrans_message_queue) == 1) {
3043  orf_token.retrans_flg = 0;
3044  instance->my_set_retrans_flg = 0;
3045  } else {
3046  orf_token.retrans_flg = 1;
3047  instance->my_set_retrans_flg = 1;
3048  }
3049 
3050  orf_token.aru = 0;
3052  orf_token.aru_addr = instance->my_id.nodeid;
3053 
3054  memcpy (&orf_token.ring_id, &instance->my_ring_id, sizeof (struct memb_ring_id));
3055  orf_token.fcc = 0;
3056  orf_token.backlog = 0;
3057 
3059 
3060  res = token_send (instance, &orf_token, 1);
3061 
3062  return (res);
3063 }
3064 
3065 static void memb_state_commit_token_update (
3066  struct totemsrp_instance *instance)
3067 {
3068  struct srp_addr *addr;
3069  struct memb_commit_token_memb_entry *memb_list;
3070  unsigned int high_aru;
3071  unsigned int i;
3072 
3073  addr = (struct srp_addr *)instance->commit_token->end_of_commit_token;
3074  memb_list = (struct memb_commit_token_memb_entry *)(addr + instance->commit_token->addr_entries);
3075 
3076  memcpy (instance->my_new_memb_list, addr,
3077  sizeof (struct srp_addr) * instance->commit_token->addr_entries);
3078 
3079  instance->my_new_memb_entries = instance->commit_token->addr_entries;
3080 
3081  memcpy (&memb_list[instance->commit_token->memb_index].ring_id,
3082  &instance->my_old_ring_id, sizeof (struct memb_ring_id));
3083 
3084  memb_list[instance->commit_token->memb_index].aru = instance->old_ring_state_aru;
3085  /*
3086  * TODO high delivered is really instance->my_aru, but with safe this
3087  * could change?
3088  */
3089  instance->my_received_flg =
3090  (instance->my_aru == instance->my_high_seq_received);
3091 
3092  memb_list[instance->commit_token->memb_index].received_flg = instance->my_received_flg;
3093 
3094  memb_list[instance->commit_token->memb_index].high_delivered = instance->my_high_delivered;
3095  /*
3096  * find high aru up to current memb_index for all matching ring ids
3097  * if any ring id matching memb_index has aru less then high aru set
3098  * received flag for that entry to false
3099  */
3100  high_aru = memb_list[instance->commit_token->memb_index].aru;
3101  for (i = 0; i <= instance->commit_token->memb_index; i++) {
3102  if (memcmp (&memb_list[instance->commit_token->memb_index].ring_id,
3103  &memb_list[i].ring_id,
3104  sizeof (struct memb_ring_id)) == 0) {
3105 
3106  if (sq_lt_compare (high_aru, memb_list[i].aru)) {
3107  high_aru = memb_list[i].aru;
3108  }
3109  }
3110  }
3111 
3112  for (i = 0; i <= instance->commit_token->memb_index; i++) {
3113  if (memcmp (&memb_list[instance->commit_token->memb_index].ring_id,
3114  &memb_list[i].ring_id,
3115  sizeof (struct memb_ring_id)) == 0) {
3116 
3117  if (sq_lt_compare (memb_list[i].aru, high_aru)) {
3118  memb_list[i].received_flg = 0;
3119  if (i == instance->commit_token->memb_index) {
3120  instance->my_received_flg = 0;
3121  }
3122  }
3123  }
3124  }
3125 
3126  instance->commit_token->header.nodeid = instance->my_id.nodeid;
3127  instance->commit_token->memb_index += 1;
3128  assert (instance->commit_token->memb_index <= instance->commit_token->addr_entries);
3129  assert (instance->commit_token->header.nodeid);
3130 }
3131 
3132 static void memb_state_commit_token_target_set (
3133  struct totemsrp_instance *instance)
3134 {
3135  struct srp_addr *addr;
3136 
3137  addr = (struct srp_addr *)instance->commit_token->end_of_commit_token;
3138 
3139  /* Totemnet just looks at the node id */
3141  instance->totemnet_context,
3142  addr[instance->commit_token->memb_index %
3143  instance->commit_token->addr_entries].nodeid);
3144 }
3145 
3146 static int memb_state_commit_token_send_recovery (
3147  struct totemsrp_instance *instance,
3148  struct memb_commit_token *commit_token)
3149 {
3150  unsigned int commit_token_size;
3151 
3152  commit_token->token_seq++;
3153  commit_token->header.nodeid = instance->my_id.nodeid;
3154  commit_token_size = sizeof (struct memb_commit_token) +
3155  ((sizeof (struct srp_addr) +
3156  sizeof (struct memb_commit_token_memb_entry)) * commit_token->addr_entries);
3157  /*
3158  * Make a copy for retransmission if necessary
3159  */
3160  memcpy (instance->orf_token_retransmit, commit_token, commit_token_size);
3161  instance->orf_token_retransmit_size = commit_token_size;
3162 
3163  instance->stats.memb_commit_token_tx++;
3164 
3166  commit_token,
3167  commit_token_size);
3168 
3169  /*
3170  * Request retransmission of the commit token in case it is lost
3171  */
3172  reset_token_retransmit_timeout (instance);
3173  return (0);
3174 }
3175 
3176 static int memb_state_commit_token_send (
3177  struct totemsrp_instance *instance)
3178 {
3179  unsigned int commit_token_size;
3180 
3181  instance->commit_token->token_seq++;
3182  instance->commit_token->header.nodeid = instance->my_id.nodeid;
3183  commit_token_size = sizeof (struct memb_commit_token) +
3184  ((sizeof (struct srp_addr) +
3185  sizeof (struct memb_commit_token_memb_entry)) * instance->commit_token->addr_entries);
3186  /*
3187  * Make a copy for retransmission if necessary
3188  */
3189  memcpy (instance->orf_token_retransmit, instance->commit_token, commit_token_size);
3190  instance->orf_token_retransmit_size = commit_token_size;
3191 
3192  instance->stats.memb_commit_token_tx++;
3193 
3195  instance->commit_token,
3196  commit_token_size);
3197 
3198  /*
3199  * Request retransmission of the commit token in case it is lost
3200  */
3201  reset_token_retransmit_timeout (instance);
3202  return (0);
3203 }
3204 
3205 
3206 static int memb_lowest_in_config (struct totemsrp_instance *instance)
3207 {
3208  struct srp_addr token_memb[PROCESSOR_COUNT_MAX];
3209  int token_memb_entries = 0;
3210  int i;
3211  unsigned int lowest_nodeid;
3212 
3213  memb_set_subtract (token_memb, &token_memb_entries,
3214  instance->my_proc_list, instance->my_proc_list_entries,
3215  instance->my_failed_list, instance->my_failed_list_entries);
3216 
3217  /*
3218  * find representative by searching for smallest identifier
3219  */
3220  assert(token_memb_entries > 0);
3221 
3222  lowest_nodeid = token_memb[0].nodeid;
3223  for (i = 1; i < token_memb_entries; i++) {
3224  if (lowest_nodeid > token_memb[i].nodeid) {
3225  lowest_nodeid = token_memb[i].nodeid;
3226  }
3227  }
3228  return (lowest_nodeid == instance->my_id.nodeid);
3229 }
3230 
3231 static int srp_addr_compare (const void *a, const void *b)
3232 {
3233  const struct srp_addr *srp_a = (const struct srp_addr *)a;
3234  const struct srp_addr *srp_b = (const struct srp_addr *)b;
3235 
3236  if (srp_a->nodeid < srp_b->nodeid) {
3237  return -1;
3238  } else if (srp_a->nodeid > srp_b->nodeid) {
3239  return 1;
3240  } else {
3241  return 0;
3242  }
3243 }
3244 
3245 static void memb_state_commit_token_create (
3246  struct totemsrp_instance *instance)
3247 {
3248  struct srp_addr token_memb[PROCESSOR_COUNT_MAX];
3249  struct srp_addr *addr;
3250  struct memb_commit_token_memb_entry *memb_list;
3251  int token_memb_entries = 0;
3252 
3254  "Creating commit token because I am the rep.");
3255 
3256  memb_set_subtract (token_memb, &token_memb_entries,
3257  instance->my_proc_list, instance->my_proc_list_entries,
3258  instance->my_failed_list, instance->my_failed_list_entries);
3259 
3260  memset (instance->commit_token, 0, sizeof (struct memb_commit_token));
3261  instance->commit_token->header.magic = TOTEM_MH_MAGIC;
3264  instance->commit_token->header.encapsulated = 0;
3265  instance->commit_token->header.nodeid = instance->my_id.nodeid;
3266  assert (instance->commit_token->header.nodeid);
3267 
3268  instance->commit_token->ring_id.rep = instance->my_id.nodeid;
3269  instance->commit_token->ring_id.seq = instance->token_ring_id_seq + 4;
3270 
3271  /*
3272  * This qsort is necessary to ensure the commit token traverses
3273  * the ring in the proper order
3274  */
3275  qsort (token_memb, token_memb_entries, sizeof (struct srp_addr),
3276  srp_addr_compare);
3277 
3278  instance->commit_token->memb_index = 0;
3279  instance->commit_token->addr_entries = token_memb_entries;
3280 
3281  addr = (struct srp_addr *)instance->commit_token->end_of_commit_token;
3282  memb_list = (struct memb_commit_token_memb_entry *)(addr + instance->commit_token->addr_entries);
3283 
3284  memcpy (addr, token_memb,
3285  token_memb_entries * sizeof (struct srp_addr));
3286  memset (memb_list, 0,
3287  sizeof (struct memb_commit_token_memb_entry) * token_memb_entries);
3288 }
3289 
3290 static void memb_join_message_send (struct totemsrp_instance *instance)
3291 {
3292  char memb_join_data[40000];
3293  struct memb_join *memb_join = (struct memb_join *)memb_join_data;
3294  char *addr;
3295  unsigned int addr_idx;
3296  size_t msg_len;
3297 
3302  memb_join->header.nodeid = instance->my_id.nodeid;
3303  assert (memb_join->header.nodeid);
3304 
3305  msg_len = sizeof(struct memb_join) +
3306  ((instance->my_proc_list_entries + instance->my_failed_list_entries) * sizeof(struct srp_addr));
3307 
3308  if (msg_len > sizeof(memb_join_data)) {
3310  "memb_join_message too long. Ignoring message.");
3311 
3312  return ;
3313  }
3314 
3315  memb_join->ring_seq = instance->my_ring_id.seq;
3318  memb_join->system_from = instance->my_id;
3319 
3320  /*
3321  * This mess adds the joined and failed processor lists into the join
3322  * message
3323  */
3324  addr = (char *)memb_join;
3325  addr_idx = sizeof (struct memb_join);
3326  memcpy (&addr[addr_idx],
3327  instance->my_proc_list,
3328  instance->my_proc_list_entries *
3329  sizeof (struct srp_addr));
3330  addr_idx +=
3331  instance->my_proc_list_entries *
3332  sizeof (struct srp_addr);
3333  memcpy (&addr[addr_idx],
3334  instance->my_failed_list,
3335  instance->my_failed_list_entries *
3336  sizeof (struct srp_addr));
3337  addr_idx +=
3338  instance->my_failed_list_entries *
3339  sizeof (struct srp_addr);
3340 
3341  if (instance->totem_config->send_join_timeout) {
3342  usleep (random() % (instance->totem_config->send_join_timeout * 1000));
3343  }
3344 
3345  instance->stats.memb_join_tx++;
3346 
3348  instance->totemnet_context,
3349  memb_join,
3350  addr_idx);
3351 }
3352 
3353 static void memb_leave_message_send (struct totemsrp_instance *instance)
3354 {
3355  char memb_join_data[40000];
3356  struct memb_join *memb_join = (struct memb_join *)memb_join_data;
3357  char *addr;
3358  unsigned int addr_idx;
3359  int active_memb_entries;
3360  struct srp_addr active_memb[PROCESSOR_COUNT_MAX];
3361  size_t msg_len;
3362 
3364  "sending join/leave message");
3365 
3366  /*
3367  * add us to the failed list, and remove us from
3368  * the members list
3369  */
3370  memb_set_merge(
3371  &instance->my_id, 1,
3372  instance->my_failed_list, &instance->my_failed_list_entries);
3373 
3374  memb_set_subtract (active_memb, &active_memb_entries,
3375  instance->my_proc_list, instance->my_proc_list_entries,
3376  &instance->my_id, 1);
3377 
3378  msg_len = sizeof(struct memb_join) +
3379  ((active_memb_entries + instance->my_failed_list_entries) * sizeof(struct srp_addr));
3380 
3381  if (msg_len > sizeof(memb_join_data)) {
3383  "memb_leave message too long. Ignoring message.");
3384 
3385  return ;
3386  }
3387 
3393 
3394  memb_join->ring_seq = instance->my_ring_id.seq;
3395  memb_join->proc_list_entries = active_memb_entries;
3397  memb_join->system_from = instance->my_id;
3398 
3399  // TODO: CC Maybe use the actual join send routine.
3400  /*
3401  * This mess adds the joined and failed processor lists into the join
3402  * message
3403  */
3404  addr = (char *)memb_join;
3405  addr_idx = sizeof (struct memb_join);
3406  memcpy (&addr[addr_idx],
3407  active_memb,
3408  active_memb_entries *
3409  sizeof (struct srp_addr));
3410  addr_idx +=
3411  active_memb_entries *
3412  sizeof (struct srp_addr);
3413  memcpy (&addr[addr_idx],
3414  instance->my_failed_list,
3415  instance->my_failed_list_entries *
3416  sizeof (struct srp_addr));
3417  addr_idx +=
3418  instance->my_failed_list_entries *
3419  sizeof (struct srp_addr);
3420 
3421 
3422  if (instance->totem_config->send_join_timeout) {
3423  usleep (random() % (instance->totem_config->send_join_timeout * 1000));
3424  }
3425  instance->stats.memb_join_tx++;
3426 
3428  instance->totemnet_context,
3429  memb_join,
3430  addr_idx);
3431 }
3432 
3433 static void memb_merge_detect_transmit (struct totemsrp_instance *instance)
3434 {
3436 
3442  memb_merge_detect.system_from = instance->my_id;
3443  memcpy (&memb_merge_detect.ring_id, &instance->my_ring_id,
3444  sizeof (struct memb_ring_id));
3445  assert (memb_merge_detect.header.nodeid);
3446 
3447  instance->stats.memb_merge_detect_tx++;
3450  sizeof (struct memb_merge_detect));
3451 }
3452 
3453 static void memb_ring_id_set (
3454  struct totemsrp_instance *instance,
3455  const struct memb_ring_id *ring_id)
3456 {
3457 
3458  memcpy (&instance->my_ring_id, ring_id, sizeof (struct memb_ring_id));
3459 }
3460 
3462  void *srp_context,
3463  void **handle_out,
3465  int delete,
3466  int (*callback_fn) (enum totem_callback_token_type type, const void *),
3467  const void *data)
3468 {
3469  struct totemsrp_instance *instance = (struct totemsrp_instance *)srp_context;
3470  struct token_callback_instance *callback_handle;
3471 
3472  token_hold_cancel_send (instance);
3473 
3474  callback_handle = malloc (sizeof (struct token_callback_instance));
3475  if (callback_handle == 0) {
3476  return (-1);
3477  }
3478  *handle_out = (void *)callback_handle;
3479  qb_list_init (&callback_handle->list);
3480  callback_handle->callback_fn = callback_fn;
3481  callback_handle->data = (void *) data;
3482  callback_handle->callback_type = type;
3483  callback_handle->delete = delete;
3484  switch (type) {
3486  qb_list_add (&callback_handle->list, &instance->token_callback_received_listhead);
3487  break;
3489  qb_list_add (&callback_handle->list, &instance->token_callback_sent_listhead);
3490  break;
3491  }
3492 
3493  return (0);
3494 }
3495 
3496 void totemsrp_callback_token_destroy (void *srp_context, void **handle_out)
3497 {
3498  struct token_callback_instance *h;
3499 
3500  if (*handle_out) {
3501  h = (struct token_callback_instance *)*handle_out;
3502  qb_list_del (&h->list);
3503  free (h);
3504  h = NULL;
3505  *handle_out = 0;
3506  }
3507 }
3508 
3509 static void token_callbacks_execute (
3510  struct totemsrp_instance *instance,
3512 {
3513  struct qb_list_head *list, *tmp_iter;
3514  struct qb_list_head *callback_listhead = 0;
3516  int res;
3517  int del;
3518 
3519  switch (type) {
3521  callback_listhead = &instance->token_callback_received_listhead;
3522  break;
3524  callback_listhead = &instance->token_callback_sent_listhead;
3525  break;
3526  default:
3527  assert (0);
3528  }
3529 
3530  qb_list_for_each_safe(list, tmp_iter, callback_listhead) {
3531  token_callback_instance = qb_list_entry (list, struct token_callback_instance, list);
3533  if (del == 1) {
3534  qb_list_del (list);
3535  }
3536 
3540  /*
3541  * This callback failed to execute, try it again on the next token
3542  */
3543  if (res == -1 && del == 1) {
3544  qb_list_add (list, callback_listhead);
3545  } else if (del) {
3546  free (token_callback_instance);
3547  }
3548  }
3549 }
3550 
3551 /*
3552  * Flow control functions
3553  */
3554 static unsigned int backlog_get (struct totemsrp_instance *instance)
3555 {
3556  unsigned int backlog = 0;
3557  struct cs_queue *queue_use = NULL;
3558 
3559  if (instance->memb_state == MEMB_STATE_OPERATIONAL) {
3560  if (instance->waiting_trans_ack) {
3561  queue_use = &instance->new_message_queue_trans;
3562  } else {
3563  queue_use = &instance->new_message_queue;
3564  }
3565  } else
3566  if (instance->memb_state == MEMB_STATE_RECOVERY) {
3567  queue_use = &instance->retrans_message_queue;
3568  }
3569 
3570  if (queue_use != NULL) {
3571  backlog = cs_queue_used (queue_use);
3572  }
3573 
3574  instance->stats.token[instance->stats.latest_token].backlog_calc = backlog;
3575  return (backlog);
3576 }
3577 
3578 static int fcc_calculate (
3579  struct totemsrp_instance *instance,
3580  struct orf_token *token)
3581 {
3582  unsigned int transmits_allowed;
3583  unsigned int backlog_calc;
3584 
3585  transmits_allowed = instance->totem_config->max_messages;
3586 
3587  if (transmits_allowed > instance->totem_config->window_size - token->fcc) {
3588  transmits_allowed = instance->totem_config->window_size - token->fcc;
3589  }
3590 
3591  instance->my_cbl = backlog_get (instance);
3592 
3593  /*
3594  * Only do backlog calculation if there is a backlog otherwise
3595  * we would result in div by zero
3596  */
3597  if (token->backlog + instance->my_cbl - instance->my_pbl) {
3598  backlog_calc = (instance->totem_config->window_size * instance->my_pbl) /
3599  (token->backlog + instance->my_cbl - instance->my_pbl);
3600  if (backlog_calc > 0 && transmits_allowed > backlog_calc) {
3601  transmits_allowed = backlog_calc;
3602  }
3603  }
3604 
3605  return (transmits_allowed);
3606 }
3607 
3608 /*
3609  * don't overflow the RTR sort queue
3610  */
3611 static void fcc_rtr_limit (
3612  struct totemsrp_instance *instance,
3613  struct orf_token *token,
3614  unsigned int *transmits_allowed)
3615 {
3616  int check = QUEUE_RTR_ITEMS_SIZE_MAX;
3617  check -= (*transmits_allowed + instance->totem_config->window_size);
3618  assert (check >= 0);
3619  if (sq_lt_compare (instance->last_released +
3620  QUEUE_RTR_ITEMS_SIZE_MAX - *transmits_allowed -
3621  instance->totem_config->window_size,
3622 
3623  token->seq)) {
3624 
3625  *transmits_allowed = 0;
3626  }
3627 }
3628 
3629 static void fcc_token_update (
3630  struct totemsrp_instance *instance,
3631  struct orf_token *token,
3632  unsigned int msgs_transmitted)
3633 {
3634  token->fcc += msgs_transmitted - instance->my_trc;
3635  token->backlog += instance->my_cbl - instance->my_pbl;
3636  instance->my_trc = msgs_transmitted;
3637  instance->my_pbl = instance->my_cbl;
3638 }
3639 
3640 /*
3641  * Sanity checkers
3642  */
3643 static int check_orf_token_sanity(
3644  const struct totemsrp_instance *instance,
3645  const void *msg,
3646  size_t msg_len,
3647  int endian_conversion_needed)
3648 {
3649  int rtr_entries;
3650  const struct orf_token *token = (const struct orf_token *)msg;
3651  size_t required_len;
3652 
3653  if (msg_len < sizeof(struct orf_token)) {
3655  "Received orf_token message is too short... ignoring.");
3656 
3657  return (-1);
3658  }
3659 
3660  if (endian_conversion_needed) {
3661  rtr_entries = swab32(token->rtr_list_entries);
3662  } else {
3663  rtr_entries = token->rtr_list_entries;
3664  }
3665 
3666  required_len = sizeof(struct orf_token) + rtr_entries * sizeof(struct rtr_item);
3667  if (msg_len < required_len) {
3669  "Received orf_token message is too short... ignoring.");
3670 
3671  return (-1);
3672  }
3673 
3674  return (0);
3675 }
3676 
3677 static int check_mcast_sanity(
3678  struct totemsrp_instance *instance,
3679  const void *msg,
3680  size_t msg_len,
3681  int endian_conversion_needed)
3682 {
3683 
3684  if (msg_len < sizeof(struct mcast)) {
3686  "Received mcast message is too short... ignoring.");
3687 
3688  return (-1);
3689  }
3690 
3691  return (0);
3692 }
3693 
3694 static int check_memb_merge_detect_sanity(
3695  struct totemsrp_instance *instance,
3696  const void *msg,
3697  size_t msg_len,
3698  int endian_conversion_needed)
3699 {
3700 
3701  if (msg_len < sizeof(struct memb_merge_detect)) {
3703  "Received memb_merge_detect message is too short... ignoring.");
3704 
3705  return (-1);
3706  }
3707 
3708  return (0);
3709 }
3710 
3711 static int check_memb_join_sanity(
3712  struct totemsrp_instance *instance,
3713  const void *msg,
3714  size_t msg_len,
3715  int endian_conversion_needed)
3716 {
3717  const struct memb_join *mj_msg = (const struct memb_join *)msg;
3718  unsigned int proc_list_entries;
3719  unsigned int failed_list_entries;
3720  size_t required_len;
3721 
3722  if (msg_len < sizeof(struct memb_join)) {
3724  "Received memb_join message is too short... ignoring.");
3725 
3726  return (-1);
3727  }
3728 
3731 
3732  if (endian_conversion_needed) {
3735  }
3736 
3737  required_len = sizeof(struct memb_join) + ((proc_list_entries + failed_list_entries) * sizeof(struct srp_addr));
3738  if (msg_len < required_len) {
3740  "Received memb_join message is too short... ignoring.");
3741 
3742  return (-1);
3743  }
3744 
3745  return (0);
3746 }
3747 
3748 static int check_memb_commit_token_sanity(
3749  struct totemsrp_instance *instance,
3750  const void *msg,
3751  size_t msg_len,
3752  int endian_conversion_needed)
3753 {
3754  const struct memb_commit_token *mct_msg = (const struct memb_commit_token *)msg;
3755  unsigned int addr_entries;
3756  size_t required_len;
3757 
3758  if (msg_len < sizeof(struct memb_commit_token)) {
3760  "Received memb_commit_token message is too short... ignoring.");
3761 
3762  return (0);
3763  }
3764 
3765  addr_entries= mct_msg->addr_entries;
3766  if (endian_conversion_needed) {
3768  }
3769 
3770  required_len = sizeof(struct memb_commit_token) +
3771  (addr_entries * (sizeof(struct srp_addr) + sizeof(struct memb_commit_token_memb_entry)));
3772  if (msg_len < required_len) {
3774  "Received memb_commit_token message is too short... ignoring.");
3775 
3776  return (-1);
3777  }
3778 
3779  return (0);
3780 }
3781 
3782 static int check_token_hold_cancel_sanity(
3783  struct totemsrp_instance *instance,
3784  const void *msg,
3785  size_t msg_len,
3786  int endian_conversion_needed)
3787 {
3788 
3789  if (msg_len < sizeof(struct token_hold_cancel)) {
3791  "Received token_hold_cancel message is too short... ignoring.");
3792 
3793  return (-1);
3794  }
3795 
3796  return (0);
3797 }
3798 
3799 /*
3800  * Message Handlers
3801  */
3802 
3803 unsigned long long int tv_old;
3804 /*
3805  * message handler called when TOKEN message type received
3806  */
3807 static int message_handler_orf_token (
3808  struct totemsrp_instance *instance,
3809  const void *msg,
3810  size_t msg_len,
3811  int endian_conversion_needed)
3812 {
3813  char token_storage[1500];
3814  char token_convert[1500];
3815  struct orf_token *token = NULL;
3816  int forward_token;
3817  unsigned int transmits_allowed;
3818  unsigned int mcasted_retransmit;
3819  unsigned int mcasted_regular;
3820  unsigned int last_aru;
3821 
3822 #ifdef GIVEINFO
3823  unsigned long long tv_current;
3824  unsigned long long tv_diff;
3825 
3826  tv_current = qb_util_nano_current_get ();
3827  tv_diff = tv_current - tv_old;
3828  tv_old = tv_current;
3829 
3831  "Time since last token %0.4f ms", ((float)tv_diff) / 1000000.0);
3832 #endif
3833 
3834  if (check_orf_token_sanity(instance, msg, msg_len, endian_conversion_needed) == -1) {
3835  return (0);
3836  }
3837 
3838  if (instance->orf_token_discard) {
3839  return (0);
3840  }
3841 #ifdef TEST_DROP_ORF_TOKEN_PERCENTAGE
3842  if (random()%100 < TEST_DROP_ORF_TOKEN_PERCENTAGE) {
3843  return (0);
3844  }
3845 #endif
3846 
3847  if (endian_conversion_needed) {
3848  orf_token_endian_convert ((struct orf_token *)msg,
3849  (struct orf_token *)token_convert);
3850  msg = (struct orf_token *)token_convert;
3851  }
3852 
3853  /*
3854  * Make copy of token and retransmit list in case we have
3855  * to flush incoming messages from the kernel queue
3856  */
3857  token = (struct orf_token *)token_storage;
3858  memcpy (token, msg, sizeof (struct orf_token));
3859  memcpy (&token->rtr_list[0], (char *)msg + sizeof (struct orf_token),
3860  sizeof (struct rtr_item) * RETRANSMIT_ENTRIES_MAX);
3861 
3862 
3863  /*
3864  * Handle merge detection timeout
3865  */
3866  if (token->seq == instance->my_last_seq) {
3867  start_merge_detect_timeout (instance);
3868  instance->my_seq_unchanged += 1;
3869  } else {
3870  cancel_merge_detect_timeout (instance);
3871  cancel_token_hold_retransmit_timeout (instance);
3872  instance->my_seq_unchanged = 0;
3873  }
3874 
3875  instance->my_last_seq = token->seq;
3876 
3877 #ifdef TEST_RECOVERY_MSG_COUNT
3878  if (instance->memb_state == MEMB_STATE_OPERATIONAL && token->seq > TEST_RECOVERY_MSG_COUNT) {
3879  return (0);
3880  }
3881 #endif
3882  instance->flushing = 1;
3884  instance->flushing = 0;
3885 
3886  /*
3887  * Determine if we should hold (in reality drop) the token
3888  */
3889  instance->my_token_held = 0;
3890  if (instance->my_ring_id.rep == instance->my_id.nodeid &&
3891  instance->my_seq_unchanged > instance->totem_config->seqno_unchanged_const) {
3892  instance->my_token_held = 1;
3893  } else {
3894  if (instance->my_ring_id.rep != instance->my_id.nodeid &&
3895  instance->my_seq_unchanged >= instance->totem_config->seqno_unchanged_const) {
3896  instance->my_token_held = 1;
3897  }
3898  }
3899 
3900  /*
3901  * Hold onto token when there is no activity on ring and
3902  * this processor is the ring rep
3903  */
3904  forward_token = 1;
3905  if (instance->my_ring_id.rep == instance->my_id.nodeid) {
3906  if (instance->my_token_held) {
3907  forward_token = 0;
3908  }
3909  }
3910 
3911  switch (instance->memb_state) {
3912  case MEMB_STATE_COMMIT:
3913  /* Discard token */
3914  break;
3915 
3917  messages_free (instance, token->aru);
3918  /*
3919  * Do NOT add break, this case should also execute code in gather case.
3920  */
3921 
3922  case MEMB_STATE_GATHER:
3923  /*
3924  * DO NOT add break, we use different free mechanism in recovery state
3925  */
3926 
3927  case MEMB_STATE_RECOVERY:
3928  /*
3929  * Discard tokens from another configuration
3930  */
3931  if (memcmp (&token->ring_id, &instance->my_ring_id,
3932  sizeof (struct memb_ring_id)) != 0) {
3933 
3934  if ((forward_token)
3935  && instance->use_heartbeat) {
3936  reset_heartbeat_timeout(instance);
3937  }
3938  else {
3939  cancel_heartbeat_timeout(instance);
3940  }
3941 
3942  return (0); /* discard token */
3943  }
3944 
3945  /*
3946  * Discard retransmitted tokens
3947  */
3948  if (sq_lte_compare (token->token_seq, instance->my_token_seq)) {
3949  return (0); /* discard token */
3950  }
3951 
3952  /*
3953  * Token is valid so trigger callbacks
3954  */
3955  token_callbacks_execute (instance, TOTEM_CALLBACK_TOKEN_RECEIVED);
3956 
3957  last_aru = instance->my_last_aru;
3958  instance->my_last_aru = token->aru;
3959 
3960  transmits_allowed = fcc_calculate (instance, token);
3961  mcasted_retransmit = orf_token_rtr (instance, token, &transmits_allowed);
3962 
3963  if (instance->my_token_held == 1 &&
3964  (token->rtr_list_entries > 0 || mcasted_retransmit > 0)) {
3965  instance->my_token_held = 0;
3966  forward_token = 1;
3967  }
3968 
3969  fcc_rtr_limit (instance, token, &transmits_allowed);
3970  mcasted_regular = orf_token_mcast (instance, token, transmits_allowed);
3971 /*
3972 if (mcasted_regular) {
3973 printf ("mcasted regular %d\n", mcasted_regular);
3974 printf ("token seq %d\n", token->seq);
3975 }
3976 */
3977  fcc_token_update (instance, token, mcasted_retransmit +
3978  mcasted_regular);
3979 
3980  if (sq_lt_compare (instance->my_aru, token->aru) ||
3981  instance->my_id.nodeid == token->aru_addr ||
3982  token->aru_addr == 0) {
3983 
3984  token->aru = instance->my_aru;
3985  if (token->aru == token->seq) {
3986  token->aru_addr = 0;
3987  } else {
3988  token->aru_addr = instance->my_id.nodeid;
3989  }
3990  }
3991  if (token->aru == last_aru && token->aru_addr != 0) {
3992  instance->my_aru_count += 1;
3993  } else {
3994  instance->my_aru_count = 0;
3995  }
3996 
3997  /*
3998  * We really don't follow specification there. In specification, OTHER nodes
3999  * detect failure of one node (based on aru_count) and my_id IS NEVER added
4000  * to failed list (so node never mark itself as failed)
4001  */
4002  if (instance->my_aru_count > instance->totem_config->fail_to_recv_const &&
4003  token->aru_addr == instance->my_id.nodeid) {
4004 
4006  "FAILED TO RECEIVE");
4007 
4008  instance->failed_to_recv = 1;
4009 
4010  memb_set_merge (&instance->my_id, 1,
4011  instance->my_failed_list,
4012  &instance->my_failed_list_entries);
4013 
4014  memb_state_gather_enter (instance, TOTEMSRP_GSFROM_FAILED_TO_RECEIVE);
4015  } else {
4016  instance->my_token_seq = token->token_seq;
4017  token->token_seq += 1;
4018 
4019  if (instance->memb_state == MEMB_STATE_RECOVERY) {
4020  /*
4021  * instance->my_aru == instance->my_high_seq_received means this processor
4022  * has recovered all messages it can recover
4023  * (ie: its retrans queue is empty)
4024  */
4025  if (cs_queue_is_empty (&instance->retrans_message_queue) == 0) {
4026 
4027  if (token->retrans_flg == 0) {
4028  token->retrans_flg = 1;
4029  instance->my_set_retrans_flg = 1;
4030  }
4031  } else
4032  if (token->retrans_flg == 1 && instance->my_set_retrans_flg) {
4033  token->retrans_flg = 0;
4034  instance->my_set_retrans_flg = 0;
4035  }
4037  "token retrans flag is %d my set retrans flag%d retrans queue empty %d count %d, aru %x",
4038  token->retrans_flg, instance->my_set_retrans_flg,
4039  cs_queue_is_empty (&instance->retrans_message_queue),
4040  instance->my_retrans_flg_count, token->aru);
4041  if (token->retrans_flg == 0) {
4042  instance->my_retrans_flg_count += 1;
4043  } else {
4044  instance->my_retrans_flg_count = 0;
4045  }
4046  if (instance->my_retrans_flg_count == 2) {
4047  instance->my_install_seq = token->seq;
4048  }
4050  "install seq %x aru %x high seq received %x",
4051  instance->my_install_seq, instance->my_aru, instance->my_high_seq_received);
4052  if (instance->my_retrans_flg_count >= 2 &&
4053  instance->my_received_flg == 0 &&
4054  sq_lte_compare (instance->my_install_seq, instance->my_aru)) {
4055  instance->my_received_flg = 1;
4056  instance->my_deliver_memb_entries = instance->my_trans_memb_entries;
4057  memcpy (instance->my_deliver_memb_list, instance->my_trans_memb_list,
4058  sizeof (struct totem_ip_address) * instance->my_trans_memb_entries);
4059  }
4060  if (instance->my_retrans_flg_count >= 3 &&
4061  sq_lte_compare (instance->my_install_seq, token->aru)) {
4062  instance->my_rotation_counter += 1;
4063  } else {
4064  instance->my_rotation_counter = 0;
4065  }
4066  if (instance->my_rotation_counter == 2) {
4068  "retrans flag count %x token aru %x install seq %x aru %x %x",
4069  instance->my_retrans_flg_count, token->aru, instance->my_install_seq,
4070  instance->my_aru, token->seq);
4071 
4072  memb_state_operational_enter (instance);
4073  instance->my_rotation_counter = 0;
4074  instance->my_retrans_flg_count = 0;
4075  }
4076  }
4077 
4079  token_send (instance, token, forward_token);
4080 
4081 #ifdef GIVEINFO
4082  tv_current = qb_util_nano_current_get ();
4083  tv_diff = tv_current - tv_old;
4084  tv_old = tv_current;
4086  "I held %0.4f ms",
4087  ((float)tv_diff) / 1000000.0);
4088 #endif
4089  if (instance->memb_state == MEMB_STATE_OPERATIONAL) {
4090  messages_deliver_to_app (instance, 0,
4091  instance->my_high_seq_received);
4092  }
4093 
4094  /*
4095  * Deliver messages after token has been transmitted
4096  * to improve performance
4097  */
4098  reset_token_timeout (instance); // REVIEWED
4099  reset_token_retransmit_timeout (instance); // REVIEWED
4100  if (instance->my_id.nodeid == instance->my_ring_id.rep &&
4101  instance->my_token_held == 1) {
4102 
4103  start_token_hold_retransmit_timeout (instance);
4104  }
4105 
4106  token_callbacks_execute (instance, TOTEM_CALLBACK_TOKEN_SENT);
4107  }
4108  break;
4109  }
4110 
4111  if ((forward_token)
4112  && instance->use_heartbeat) {
4113  reset_heartbeat_timeout(instance);
4114  }
4115  else {
4116  cancel_heartbeat_timeout(instance);
4117  }
4118 
4119  return (0);
4120 }
4121 
4122 static void messages_deliver_to_app (
4123  struct totemsrp_instance *instance,
4124  int skip,
4125  unsigned int end_point)
4126 {
4127  struct sort_queue_item *sort_queue_item_p;
4128  unsigned int i;
4129  int res;
4130  struct mcast *mcast_in;
4131  struct mcast mcast_header;
4132  unsigned int range = 0;
4133  int endian_conversion_required;
4134  unsigned int my_high_delivered_stored = 0;
4135  struct srp_addr aligned_system_from;
4136 
4137  range = end_point - instance->my_high_delivered;
4138 
4139  if (range) {
4141  "Delivering %x to %x", instance->my_high_delivered,
4142  end_point);
4143  }
4144  assert (range < QUEUE_RTR_ITEMS_SIZE_MAX);
4145  my_high_delivered_stored = instance->my_high_delivered;
4146 
4147  /*
4148  * Deliver messages in order from rtr queue to pending delivery queue
4149  */
4150  for (i = 1; i <= range; i++) {
4151 
4152  void *ptr = 0;
4153 
4154  /*
4155  * If out of range of sort queue, stop assembly
4156  */
4157  res = sq_in_range (&instance->regular_sort_queue,
4158  my_high_delivered_stored + i);
4159  if (res == 0) {
4160  break;
4161  }
4162 
4163  res = sq_item_get (&instance->regular_sort_queue,
4164  my_high_delivered_stored + i, &ptr);
4165  /*
4166  * If hole, stop assembly
4167  */
4168  if (res != 0 && skip == 0) {
4169  break;
4170  }
4171 
4172  instance->my_high_delivered = my_high_delivered_stored + i;
4173 
4174  if (res != 0) {
4175  continue;
4176 
4177  }
4178 
4179  sort_queue_item_p = ptr;
4180 
4181  mcast_in = sort_queue_item_p->mcast;
4182  assert (mcast_in != (struct mcast *)0xdeadbeef);
4183 
4184  endian_conversion_required = 0;
4185  if (mcast_in->header.magic != TOTEM_MH_MAGIC) {
4186  endian_conversion_required = 1;
4187  mcast_endian_convert (mcast_in, &mcast_header);
4188  } else {
4189  memcpy (&mcast_header, mcast_in, sizeof (struct mcast));
4190  }
4191 
4192  aligned_system_from = mcast_header.system_from;
4193 
4194  /*
4195  * Skip messages not originated in instance->my_deliver_memb
4196  */
4197  if (skip &&
4198  memb_set_subset (&aligned_system_from,
4199  1,
4200  instance->my_deliver_memb_list,
4201  instance->my_deliver_memb_entries) == 0) {
4202 
4203  instance->my_high_delivered = my_high_delivered_stored + i;
4204 
4205  continue;
4206  }
4207 
4208  /*
4209  * Message found
4210  */
4212  "Delivering MCAST message with seq %x to pending delivery queue",
4213  mcast_header.seq);
4214 
4215  /*
4216  * Message is locally originated multicast
4217  */
4218  instance->totemsrp_deliver_fn (
4219  mcast_header.header.nodeid,
4220  ((char *)sort_queue_item_p->mcast) + sizeof (struct mcast),
4221  sort_queue_item_p->msg_len - sizeof (struct mcast),
4222  endian_conversion_required);
4223  }
4224 }
4225 
4226 /*
4227  * recv message handler called when MCAST message type received
4228  */
4229 static int message_handler_mcast (
4230  struct totemsrp_instance *instance,
4231  const void *msg,
4232  size_t msg_len,
4233  int endian_conversion_needed)
4234 {
4236  struct sq *sort_queue;
4237  struct mcast mcast_header;
4238  struct srp_addr aligned_system_from;
4239 
4240  if (check_mcast_sanity(instance, msg, msg_len, endian_conversion_needed) == -1) {
4241  return (0);
4242  }
4243 
4244  if (endian_conversion_needed) {
4245  mcast_endian_convert (msg, &mcast_header);
4246  } else {
4247  memcpy (&mcast_header, msg, sizeof (struct mcast));
4248  }
4249 
4250  if (mcast_header.header.encapsulated == MESSAGE_ENCAPSULATED) {
4251  sort_queue = &instance->recovery_sort_queue;
4252  } else {
4253  sort_queue = &instance->regular_sort_queue;
4254  }
4255 
4256  assert (msg_len <= FRAME_SIZE_MAX);
4257 
4258 #ifdef TEST_DROP_MCAST_PERCENTAGE
4259  if (random()%100 < TEST_DROP_MCAST_PERCENTAGE) {
4260  return (0);
4261  }
4262 #endif
4263 
4264  /*
4265  * If the message is foreign execute the switch below
4266  */
4267  if (memcmp (&instance->my_ring_id, &mcast_header.ring_id,
4268  sizeof (struct memb_ring_id)) != 0) {
4269 
4270  aligned_system_from = mcast_header.system_from;
4271 
4272  switch (instance->memb_state) {
4274  memb_set_merge (
4275  &aligned_system_from, 1,
4276  instance->my_proc_list, &instance->my_proc_list_entries);
4277  memb_state_gather_enter (instance, TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_OPERATIONAL_STATE);
4278  break;
4279 
4280  case MEMB_STATE_GATHER:
4281  if (!memb_set_subset (
4282  &aligned_system_from,
4283  1,
4284  instance->my_proc_list,
4285  instance->my_proc_list_entries)) {
4286 
4287  memb_set_merge (&aligned_system_from, 1,
4288  instance->my_proc_list, &instance->my_proc_list_entries);
4289  memb_state_gather_enter (instance, TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_GATHER_STATE);
4290  return (0);
4291  }
4292  break;
4293 
4294  case MEMB_STATE_COMMIT:
4295  /* discard message */
4296  instance->stats.rx_msg_dropped++;
4297  break;
4298 
4299  case MEMB_STATE_RECOVERY:
4300  /* discard message */
4301  instance->stats.rx_msg_dropped++;
4302  break;
4303  }
4304  return (0);
4305  }
4306 
4308  "Received ringid (" CS_PRI_RING_ID ") seq %x",
4309  mcast_header.ring_id.rep,
4310  (uint64_t)mcast_header.ring_id.seq,
4311  mcast_header.seq);
4312 
4313  /*
4314  * Add mcast message to rtr queue if not already in rtr queue
4315  * otherwise free io vectors
4316  */
4317  if (msg_len > 0 && msg_len <= FRAME_SIZE_MAX &&
4318  sq_in_range (sort_queue, mcast_header.seq) &&
4319  sq_item_inuse (sort_queue, mcast_header.seq) == 0) {
4320 
4321  /*
4322  * Allocate new multicast memory block
4323  */
4324 // TODO LEAK
4325  sort_queue_item.mcast = totemsrp_buffer_alloc (instance);
4326  if (sort_queue_item.mcast == NULL) {
4327  return (-1); /* error here is corrected by the algorithm */
4328  }
4329  memcpy (sort_queue_item.mcast, msg, msg_len);
4330  sort_queue_item.msg_len = msg_len;
4331 
4332  if (sq_lt_compare (instance->my_high_seq_received,
4333  mcast_header.seq)) {
4334  instance->my_high_seq_received = mcast_header.seq;
4335  }
4336 
4337  sq_item_add (sort_queue, &sort_queue_item, mcast_header.seq);
4338  }
4339 
4340  update_aru (instance);
4341  if (instance->memb_state == MEMB_STATE_OPERATIONAL) {
4342  messages_deliver_to_app (instance, 0, instance->my_high_seq_received);
4343  }
4344 
4345 /* TODO remove from retrans message queue for old ring in recovery state */
4346  return (0);
4347 }
4348 
4349 static int message_handler_memb_merge_detect (
4350  struct totemsrp_instance *instance,
4351  const void *msg,
4352  size_t msg_len,
4353  int endian_conversion_needed)
4354 {
4356  struct srp_addr aligned_system_from;
4357 
4358  if (check_memb_merge_detect_sanity(instance, msg, msg_len, endian_conversion_needed) == -1) {
4359  return (0);
4360  }
4361 
4362  if (endian_conversion_needed) {
4363  memb_merge_detect_endian_convert (msg, &memb_merge_detect);
4364  } else {
4365  memcpy (&memb_merge_detect, msg,
4366  sizeof (struct memb_merge_detect));
4367  }
4368 
4369  /*
4370  * do nothing if this is a merge detect from this configuration
4371  */
4372  if (memcmp (&instance->my_ring_id, &memb_merge_detect.ring_id,
4373  sizeof (struct memb_ring_id)) == 0) {
4374 
4375  return (0);
4376  }
4377 
4378  aligned_system_from = memb_merge_detect.system_from;
4379 
4380  /*
4381  * Execute merge operation
4382  */
4383  switch (instance->memb_state) {
4385  memb_set_merge (&aligned_system_from, 1,
4386  instance->my_proc_list, &instance->my_proc_list_entries);
4387  memb_state_gather_enter (instance, TOTEMSRP_GSFROM_MERGE_DURING_OPERATIONAL_STATE);
4388  break;
4389 
4390  case MEMB_STATE_GATHER:
4391  if (!memb_set_subset (
4392  &aligned_system_from,
4393  1,
4394  instance->my_proc_list,
4395  instance->my_proc_list_entries)) {
4396 
4397  memb_set_merge (&aligned_system_from, 1,
4398  instance->my_proc_list, &instance->my_proc_list_entries);
4399  memb_state_gather_enter (instance, TOTEMSRP_GSFROM_MERGE_DURING_GATHER_STATE);
4400  return (0);
4401  }
4402  break;
4403 
4404  case MEMB_STATE_COMMIT:
4405  /* do nothing in commit */
4406  break;
4407 
4408  case MEMB_STATE_RECOVERY:
4409  /* do nothing in recovery */
4410  break;
4411  }
4412  return (0);
4413 }
4414 
4415 static void memb_join_process (
4416  struct totemsrp_instance *instance,
4417  const struct memb_join *memb_join)
4418 {
4419  struct srp_addr *proc_list;
4420  struct srp_addr *failed_list;
4421  int gather_entered = 0;
4422  int fail_minus_memb_entries = 0;
4423  struct srp_addr fail_minus_memb[PROCESSOR_COUNT_MAX];
4424  struct srp_addr aligned_system_from;
4425 
4426  proc_list = (struct srp_addr *)memb_join->end_of_memb_join;
4427  failed_list = proc_list + memb_join->proc_list_entries;
4428  aligned_system_from = memb_join->system_from;
4429 
4430  log_printf(instance->totemsrp_log_level_trace, "memb_join_process");
4431  memb_set_log(instance, instance->totemsrp_log_level_trace,
4432  "proclist", proc_list, memb_join->proc_list_entries);
4433  memb_set_log(instance, instance->totemsrp_log_level_trace,
4434  "faillist", failed_list, memb_join->failed_list_entries);
4435  memb_set_log(instance, instance->totemsrp_log_level_trace,
4436  "my_proclist", instance->my_proc_list, instance->my_proc_list_entries);
4437  memb_set_log(instance, instance->totemsrp_log_level_trace,
4438  "my_faillist", instance->my_failed_list, instance->my_failed_list_entries);
4439 
4441  if (instance->flushing) {
4444  "Discarding LEAVE message during flush, nodeid=" CS_PRI_NODE_ID,
4446  if (memb_join->failed_list_entries > 0) {
4447  my_leave_memb_set(instance, failed_list[memb_join->failed_list_entries - 1 ].nodeid);
4448  }
4449  } else {
4451  "Discarding JOIN message during flush, nodeid=" CS_PRI_NODE_ID, memb_join->header.nodeid);
4452  }
4453  return;
4454  } else {
4457  "Received LEAVE message from " CS_PRI_NODE_ID, memb_join->failed_list_entries > 0 ? failed_list[memb_join->failed_list_entries - 1 ].nodeid : LEAVE_DUMMY_NODEID);
4458  if (memb_join->failed_list_entries > 0) {
4459  my_leave_memb_set(instance, failed_list[memb_join->failed_list_entries - 1 ].nodeid);
4460  }
4461  }
4462  }
4463 
4464  }
4465 
4466  if (memb_set_equal (proc_list,
4468  instance->my_proc_list,
4469  instance->my_proc_list_entries) &&
4470 
4471  memb_set_equal (failed_list,
4473  instance->my_failed_list,
4474  instance->my_failed_list_entries)) {
4475 
4477  memb_consensus_set (instance, &aligned_system_from);
4478  }
4479 
4480  if (memb_consensus_agreed (instance) && instance->failed_to_recv == 1) {
4481  instance->failed_to_recv = 0;
4482  instance->my_proc_list[0] = instance->my_id;
4483  instance->my_proc_list_entries = 1;
4484  instance->my_failed_list_entries = 0;
4485 
4486  memb_state_commit_token_create (instance);
4487 
4488  memb_state_commit_enter (instance);
4489  return;
4490  }
4491  if (memb_consensus_agreed (instance) &&
4492  memb_lowest_in_config (instance)) {
4493 
4494  memb_state_commit_token_create (instance);
4495 
4496  memb_state_commit_enter (instance);
4497  } else {
4498  goto out;
4499  }
4500  } else
4501  if (memb_set_subset (proc_list,
4503  instance->my_proc_list,
4504  instance->my_proc_list_entries) &&
4505 
4506  memb_set_subset (failed_list,
4508  instance->my_failed_list,
4509  instance->my_failed_list_entries)) {
4510 
4511  goto out;
4512  } else
4513  if (memb_set_subset (&aligned_system_from, 1,
4514  instance->my_failed_list, instance->my_failed_list_entries)) {
4515 
4516  goto out;
4517  } else {
4518  memb_set_merge (proc_list,
4520  instance->my_proc_list, &instance->my_proc_list_entries);
4521 
4522  if (memb_set_subset (
4523  &instance->my_id, 1,
4524  failed_list, memb_join->failed_list_entries)) {
4525 
4526  memb_set_merge (
4527  &aligned_system_from, 1,
4528  instance->my_failed_list, &instance->my_failed_list_entries);
4529  } else {
4530  if (memb_set_subset (
4531  &aligned_system_from, 1,
4532  instance->my_memb_list,
4533  instance->my_memb_entries)) {
4534 
4535  if (memb_set_subset (
4536  &aligned_system_from, 1,
4537  instance->my_failed_list,
4538  instance->my_failed_list_entries) == 0) {
4539 
4540  memb_set_merge (failed_list,
4542  instance->my_failed_list, &instance->my_failed_list_entries);
4543  } else {
4544  memb_set_subtract (fail_minus_memb,
4545  &fail_minus_memb_entries,
4546  failed_list,
4548  instance->my_memb_list,
4549  instance->my_memb_entries);
4550 
4551  memb_set_merge (fail_minus_memb,
4552  fail_minus_memb_entries,
4553  instance->my_failed_list,
4554  &instance->my_failed_list_entries);
4555  }
4556  }
4557  }
4558  memb_state_gather_enter (instance, TOTEMSRP_GSFROM_MERGE_DURING_JOIN);
4559  gather_entered = 1;
4560  }
4561 
4562 out:
4563  if (gather_entered == 0 &&
4564  instance->memb_state == MEMB_STATE_OPERATIONAL) {
4565 
4566  memb_state_gather_enter (instance, TOTEMSRP_GSFROM_JOIN_DURING_OPERATIONAL_STATE);
4567  }
4568 }
4569 
4570 static void memb_join_endian_convert (const struct memb_join *in, struct memb_join *out)
4571 {
4572  int i;
4573  struct srp_addr *in_proc_list;
4574  struct srp_addr *in_failed_list;
4575  struct srp_addr *out_proc_list;
4576  struct srp_addr *out_failed_list;
4577 
4578  out->header.magic = TOTEM_MH_MAGIC;
4580  out->header.type = in->header.type;
4581  out->header.nodeid = swab32 (in->header.nodeid);
4582  out->system_from = srp_addr_endian_convert(in->system_from);
4585  out->ring_seq = swab64 (in->ring_seq);
4586 
4587  in_proc_list = (struct srp_addr *)in->end_of_memb_join;
4588  in_failed_list = in_proc_list + out->proc_list_entries;
4589  out_proc_list = (struct srp_addr *)out->end_of_memb_join;
4590  out_failed_list = out_proc_list + out->proc_list_entries;
4591 
4592  for (i = 0; i < out->proc_list_entries; i++) {
4593  out_proc_list[i] = srp_addr_endian_convert (in_proc_list[i]);
4594  }
4595  for (i = 0; i < out->failed_list_entries; i++) {
4596  out_failed_list[i] = srp_addr_endian_convert (in_failed_list[i]);
4597  }
4598 }
4599 
4600 static void memb_commit_token_endian_convert (const struct memb_commit_token *in, struct memb_commit_token *out)
4601 {
4602  int i;
4603  struct srp_addr *in_addr = (struct srp_addr *)in->end_of_commit_token;
4604  struct srp_addr *out_addr = (struct srp_addr *)out->end_of_commit_token;
4605  struct memb_commit_token_memb_entry *in_memb_list;
4606  struct memb_commit_token_memb_entry *out_memb_list;
4607 
4608  out->header.magic = TOTEM_MH_MAGIC;
4610  out->header.type = in->header.type;
4611  out->header.nodeid = swab32 (in->header.nodeid);
4612  out->token_seq = swab32 (in->token_seq);
4613  out->ring_id.rep = swab32(in->ring_id.rep);
4614  out->ring_id.seq = swab64 (in->ring_id.seq);
4615  out->retrans_flg = swab32 (in->retrans_flg);
4616  out->memb_index = swab32 (in->memb_index);
4617  out->addr_entries = swab32 (in->addr_entries);
4618 
4619  in_memb_list = (struct memb_commit_token_memb_entry *)(in_addr + out->addr_entries);
4620  out_memb_list = (struct memb_commit_token_memb_entry *)(out_addr + out->addr_entries);
4621  for (i = 0; i < out->addr_entries; i++) {
4622  out_addr[i] = srp_addr_endian_convert (in_addr[i]);
4623 
4624  /*
4625  * Only convert the memb entry if it has been set
4626  */
4627  if (in_memb_list[i].ring_id.rep != 0) {
4628  out_memb_list[i].ring_id.rep = swab32(in_memb_list[i].ring_id.rep);
4629 
4630  out_memb_list[i].ring_id.seq =
4631  swab64 (in_memb_list[i].ring_id.seq);
4632  out_memb_list[i].aru = swab32 (in_memb_list[i].aru);
4633  out_memb_list[i].high_delivered = swab32 (in_memb_list[i].high_delivered);
4634  out_memb_list[i].received_flg = swab32 (in_memb_list[i].received_flg);
4635  }
4636  }
4637 }
4638 
4639 static void orf_token_endian_convert (const struct orf_token *in, struct orf_token *out)
4640 {
4641  int i;
4642 
4643  out->header.magic = TOTEM_MH_MAGIC;
4645  out->header.type = in->header.type;
4646  out->header.nodeid = swab32 (in->header.nodeid);
4647  out->seq = swab32 (in->seq);
4648  out->token_seq = swab32 (in->token_seq);
4649  out->aru = swab32 (in->aru);
4650  out->ring_id.rep = swab32(in->ring_id.rep);
4651  out->aru_addr = swab32(in->aru_addr);
4652  out->ring_id.seq = swab64 (in->ring_id.seq);
4653  out->fcc = swab32 (in->fcc);
4654  out->backlog = swab32 (in->backlog);
4655  out->retrans_flg = swab32 (in->retrans_flg);
4657  for (i = 0; i < out->rtr_list_entries; i++) {
4658  out->rtr_list[i].ring_id.rep = swab32(in->rtr_list[i].ring_id.rep);
4659  out->rtr_list[i].ring_id.seq = swab64 (in->rtr_list[i].ring_id.seq);
4660  out->rtr_list[i].seq = swab32 (in->rtr_list[i].seq);
4661  }
4662 }
4663 
4664 static void mcast_endian_convert (const struct mcast *in, struct mcast *out)
4665 {
4666  out->header.magic = TOTEM_MH_MAGIC;
4668  out->header.type = in->header.type;
4669  out->header.nodeid = swab32 (in->header.nodeid);
4671 
4672  out->seq = swab32 (in->seq);
4673  out->this_seqno = swab32 (in->this_seqno);
4674  out->ring_id.rep = swab32(in->ring_id.rep);
4675  out->ring_id.seq = swab64 (in->ring_id.seq);
4676  out->node_id = swab32 (in->node_id);
4677  out->guarantee = swab32 (in->guarantee);
4678  out->system_from = srp_addr_endian_convert(in->system_from);
4679 }
4680 
4681 static void memb_merge_detect_endian_convert (
4682  const struct memb_merge_detect *in,
4683  struct memb_merge_detect *out)
4684 {
4685  out->header.magic = TOTEM_MH_MAGIC;
4687  out->header.type = in->header.type;
4688  out->header.nodeid = swab32 (in->header.nodeid);
4689  out->ring_id.rep = swab32(in->ring_id.rep);
4690  out->ring_id.seq = swab64 (in->ring_id.seq);
4691  out->system_from = srp_addr_endian_convert (in->system_from);
4692 }
4693 
4694 static int ignore_join_under_operational (
4695  struct totemsrp_instance *instance,
4696  const struct memb_join *memb_join)
4697 {
4698  struct srp_addr *proc_list;
4699  struct srp_addr *failed_list;
4700  unsigned long long ring_seq;
4701  struct srp_addr aligned_system_from;
4702 
4703  proc_list = (struct srp_addr *)memb_join->end_of_memb_join;
4704  failed_list = proc_list + memb_join->proc_list_entries;
4706  aligned_system_from = memb_join->system_from;
4707 
4708  if (memb_set_subset (&instance->my_id, 1,
4709  failed_list, memb_join->failed_list_entries)) {
4710  return (1);
4711  }
4712 
4713  /*
4714  * In operational state, my_proc_list is exactly the same as
4715  * my_memb_list.
4716  */
4717  if ((memb_set_subset (&aligned_system_from, 1,
4718  instance->my_memb_list, instance->my_memb_entries)) &&
4719  (ring_seq < instance->my_ring_id.seq)) {
4720  return (1);
4721  }
4722 
4723  return (0);
4724 }
4725 
4726 static int message_handler_memb_join (
4727  struct totemsrp_instance *instance,
4728  const void *msg,
4729  size_t msg_len,
4730  int endian_conversion_needed)
4731 {
4732  const struct memb_join *memb_join;
4733  struct memb_join *memb_join_convert = alloca (msg_len);
4734  struct srp_addr aligned_system_from;
4735 
4736  if (check_memb_join_sanity(instance, msg, msg_len, endian_conversion_needed) == -1) {
4737  return (0);
4738  }
4739 
4740  if (endian_conversion_needed) {
4741  memb_join = memb_join_convert;
4742  memb_join_endian_convert (msg, memb_join_convert);
4743 
4744  } else {
4745  memb_join = msg;
4746  }
4747 
4748  aligned_system_from = memb_join->system_from;
4749 
4750  /*
4751  * If the process paused because it wasn't scheduled in a timely
4752  * fashion, flush the join messages because they may be queued
4753  * entries
4754  */
4755  if (pause_flush (instance)) {
4756  return (0);
4757  }
4758 
4759  if (instance->token_ring_id_seq < memb_join->ring_seq) {
4760  instance->token_ring_id_seq = memb_join->ring_seq;
4761  }
4762  switch (instance->memb_state) {
4764  if (!ignore_join_under_operational (instance, memb_join)) {
4765  memb_join_process (instance, memb_join);
4766  }
4767  break;
4768 
4769  case MEMB_STATE_GATHER:
4770  memb_join_process (instance, memb_join);
4771  break;
4772 
4773  case MEMB_STATE_COMMIT:
4774  if (memb_set_subset (&aligned_system_from,
4775  1,
4776  instance->my_new_memb_list,
4777  instance->my_new_memb_entries) &&
4778 
4779  memb_join->ring_seq >= instance->my_ring_id.seq) {
4780 
4781  memb_join_process (instance, memb_join);
4782  memb_state_gather_enter (instance, TOTEMSRP_GSFROM_JOIN_DURING_COMMIT_STATE);
4783  }
4784  break;
4785 
4786  case MEMB_STATE_RECOVERY:
4787  if (memb_set_subset (&aligned_system_from,
4788  1,
4789  instance->my_new_memb_list,
4790  instance->my_new_memb_entries) &&
4791 
4792  memb_join->ring_seq >= instance->my_ring_id.seq) {
4793 
4794  memb_join_process (instance, memb_join);
4795  memb_recovery_state_token_loss (instance);
4796  memb_state_gather_enter (instance, TOTEMSRP_GSFROM_JOIN_DURING_RECOVERY);
4797  }
4798  break;
4799  }
4800  return (0);
4801 }
4802 
4803 static int message_handler_memb_commit_token (
4804  struct totemsrp_instance *instance,
4805  const void *msg,
4806  size_t msg_len,
4807  int endian_conversion_needed)
4808 {
4809  struct memb_commit_token *memb_commit_token_convert = alloca (msg_len);
4811  struct srp_addr sub[PROCESSOR_COUNT_MAX];
4812  int sub_entries;
4813 
4814  struct srp_addr *addr;
4815 
4817  "got commit token");
4818 
4819  if (check_memb_commit_token_sanity(instance, msg, msg_len, endian_conversion_needed) == -1) {
4820  return (0);
4821  }
4822 
4823  if (endian_conversion_needed) {
4824  memb_commit_token_endian_convert (msg, memb_commit_token_convert);
4825  } else {
4826  memcpy (memb_commit_token_convert, msg, msg_len);
4827  }
4828  memb_commit_token = memb_commit_token_convert;
4830 
4831 #ifdef TEST_DROP_COMMIT_TOKEN_PERCENTAGE
4832  if (random()%100 < TEST_DROP_COMMIT_TOKEN_PERCENTAGE) {
4833  return (0);
4834  }
4835 #endif
4836  switch (instance->memb_state) {
4838  /* discard token */
4839  break;
4840 
4841  case MEMB_STATE_GATHER:
4842  memb_set_subtract (sub, &sub_entries,
4843  instance->my_proc_list, instance->my_proc_list_entries,
4844  instance->my_failed_list, instance->my_failed_list_entries);
4845 
4846  if (memb_set_equal (addr,
4848  sub,
4849  sub_entries) &&
4850 
4851  memb_commit_token->ring_id.seq > instance->my_ring_id.seq) {
4852  memcpy (instance->commit_token, memb_commit_token, msg_len);
4853  memb_state_commit_enter (instance);
4854  }
4855  break;
4856 
4857  case MEMB_STATE_COMMIT:
4858  /*
4859  * If retransmitted commit tokens are sent on this ring
4860  * filter them out and only enter recovery once the
4861  * commit token has traversed the array. This is
4862  * determined by :
4863  * memb_commit_token->memb_index == memb_commit_token->addr_entries) {
4864  */
4865  if (memb_commit_token->ring_id.seq == instance->my_ring_id.seq &&
4867  memb_state_recovery_enter (instance, memb_commit_token);
4868  }
4869  break;
4870 
4871  case MEMB_STATE_RECOVERY:
4872  if (instance->my_id.nodeid == instance->my_ring_id.rep) {
4873 
4874  /* Filter out duplicated tokens */
4875  if (instance->originated_orf_token) {
4876  break;
4877  }
4878 
4879  instance->originated_orf_token = 1;
4880 
4882  "Sending initial ORF token");
4883 
4884  // TODO convert instead of initiate
4885  orf_token_send_initial (instance);
4886  reset_token_timeout (instance); // REVIEWED
4887  reset_token_retransmit_timeout (instance); // REVIEWED
4888  }
4889  break;
4890  }
4891  return (0);
4892 }
4893 
4894 static int message_handler_token_hold_cancel (
4895  struct totemsrp_instance *instance,
4896  const void *msg,
4897  size_t msg_len,
4898  int endian_conversion_needed)
4899 {
4900  const struct token_hold_cancel *token_hold_cancel = msg;
4901 
4902  if (check_token_hold_cancel_sanity(instance, msg, msg_len, endian_conversion_needed) == -1) {
4903  return (0);
4904  }
4905 
4906  if (memcmp (&token_hold_cancel->ring_id, &instance->my_ring_id,
4907  sizeof (struct memb_ring_id)) == 0) {
4908 
4909  instance->my_seq_unchanged = 0;
4910  if (instance->my_ring_id.rep == instance->my_id.nodeid) {
4911  timer_function_token_retransmit_timeout (instance);
4912  }
4913  }
4914  return (0);
4915 }
4916 
4917 static int check_message_header_validity(
4918  void *context,
4919  const void *msg,
4920  unsigned int msg_len,
4921  const struct sockaddr_storage *system_from)
4922 {
4923  struct totemsrp_instance *instance = context;
4924  const struct totem_message_header *message_header = msg;
4925  const char *guessed_str;
4926  const char *msg_byte = msg;
4927 
4928  if (msg_len < sizeof (struct totem_message_header)) {
4930  "Message received from %s is too short... Ignoring %u.",
4931  totemip_sa_print((struct sockaddr *)system_from), (unsigned int)msg_len);
4932  return (-1);
4933  }
4934 
4935  if (message_header->magic != TOTEM_MH_MAGIC &&
4936  message_header->magic != swab16(TOTEM_MH_MAGIC)) {
4937  /*
4938  * We've received ether Knet, old version of Corosync,
4939  * or something else. Do some guessing to display (hopefully)
4940  * helpful message
4941  */
4942  guessed_str = NULL;
4943 
4944  if (message_header->magic == 0xFFFF) {
4945  /*
4946  * Corosync 2.2 used header with two UINT8_MAX
4947  */
4948  guessed_str = "Corosync 2.2";
4949  } else if (message_header->magic == 0xFEFE) {
4950  /*
4951  * Corosync 2.3+ used header with two UINT8_MAX - 1
4952  */
4953  guessed_str = "Corosync 2.3+";
4954  } else if (msg_byte[0] == 0x01) {
4955  /*
4956  * Knet has stable1 with first byte of message == 1
4957  */
4958  guessed_str = "unencrypted Kronosnet";
4959  } else if (msg_byte[0] >= 0 && msg_byte[0] <= 5) {
4960  /*
4961  * Unencrypted Corosync 1.x/OpenAIS has first byte
4962  * 0-5. Collision with Knet (but still worth the try)
4963  */
4964  guessed_str = "unencrypted Corosync 2.0/2.1/1.x/OpenAIS";
4965  } else {
4966  /*
4967  * Encrypted Kronosned packet has a hash at the end of
4968  * the packet and nothing specific at the beginning of the
4969  * packet (just encrypted data).
4970  * Encrypted Corosync 1.x/OpenAIS is quite similar but hash_digest
4971  * is in the beginning of the packet.
4972  *
4973  * So it's not possible to reliably detect ether of them.
4974  */
4975  guessed_str = "encrypted Kronosnet/Corosync 2.0/2.1/1.x/OpenAIS or unknown";
4976  }
4977 
4979  "Message received from %s has bad magic number (probably sent by %s).. Ignoring",
4980  totemip_sa_print((struct sockaddr *)system_from),
4981  guessed_str);
4982 
4983  return (-1);
4984  }
4985 
4986  if (message_header->version != TOTEM_MH_VERSION) {
4988  "Message received from %s has unsupported version %u... Ignoring",
4989  totemip_sa_print((struct sockaddr *)system_from),
4990  message_header->version);
4991 
4992  return (-1);
4993  }
4994 
4995  return (0);
4996 }
4997 
4998 
5000  void *context,
5001  const void *msg,
5002  unsigned int msg_len,
5003  const struct sockaddr_storage *system_from)
5004 {
5005  struct totemsrp_instance *instance = context;
5006  const struct totem_message_header *message_header = msg;
5007 
5008  if (check_message_header_validity(context, msg, msg_len, system_from) == -1) {
5009  return ;
5010  }
5011 
5012  switch (message_header->type) {
5014  instance->stats.orf_token_rx++;
5015  break;
5016  case MESSAGE_TYPE_MCAST:
5017  instance->stats.mcast_rx++;
5018  break;
5020  instance->stats.memb_merge_detect_rx++;
5021  break;
5023  instance->stats.memb_join_rx++;
5024  break;
5026  instance->stats.memb_commit_token_rx++;
5027  break;
5029  instance->stats.token_hold_cancel_rx++;
5030  break;
5031  default:
5033  "Message received from %s has wrong type... ignoring %d.\n",
5034  totemip_sa_print((struct sockaddr *)system_from),
5035  (int)message_header->type);
5036 
5037  instance->stats.rx_msg_dropped++;
5038  return;
5039  }
5040  /*
5041  * Handle incoming message
5042  */
5043  totemsrp_message_handlers.handler_functions[(int)message_header->type] (
5044  instance,
5045  msg,
5046  msg_len,
5047  message_header->magic != TOTEM_MH_MAGIC);
5048 }
5049 
5051  void *context,
5052  const struct totem_ip_address *interface_addr,
5053  unsigned short ip_port,
5054  unsigned int iface_no)
5055 {
5056  struct totemsrp_instance *instance = context;
5057  int res;
5058 
5059  totemip_copy(&instance->my_addrs[iface_no], interface_addr);
5060 
5061  res = totemnet_iface_set (
5062  instance->totemnet_context,
5063  interface_addr,
5064  ip_port,
5065  iface_no);
5066 
5067  return (res);
5068 }
5069 
5070 /* Contrary to its name, this only gets called when the interface is enabled */
5072  void *context,
5073  const struct totem_ip_address *iface_addr,
5074  unsigned int iface_no)
5075 {
5076  struct totemsrp_instance *instance = context;
5077  int num_interfaces;
5078  int i;
5079 
5080  if (!instance->my_id.nodeid) {
5081  instance->my_id.nodeid = iface_addr->nodeid;
5082  }
5083  totemip_copy (&instance->my_addrs[iface_no], iface_addr);
5084 
5085  if (instance->iface_changes++ == 0) {
5086  instance->memb_ring_id_create_or_load (&instance->my_ring_id, instance->my_id.nodeid);
5087  /*
5088  * Increase the ring_id sequence number. This doesn't follow specification.
5089  * Solves problem with restarted leader node (node with lowest nodeid) before
5090  * rest of the cluster forms new membership and guarantees unique ring_id for
5091  * new singleton configuration.
5092  */
5093  instance->my_ring_id.seq++;
5094 
5095  instance->token_ring_id_seq = instance->my_ring_id.seq;
5096  log_printf (
5097  instance->totemsrp_log_level_debug,
5098  "Created or loaded sequence id " CS_PRI_RING_ID " for this ring.",
5099  instance->my_ring_id.rep,
5100  (uint64_t)instance->my_ring_id.seq);
5101 
5102  if (instance->totemsrp_service_ready_fn) {
5103  instance->totemsrp_service_ready_fn ();
5104  }
5105 
5106  }
5107 
5108  num_interfaces = 0;
5109  for (i = 0; i < INTERFACE_MAX; i++) {
5110  if (instance->totem_config->interfaces[i].configured) {
5111  num_interfaces++;
5112  }
5113  }
5114 
5115  if (instance->iface_changes >= num_interfaces) {
5116  /* We need to clear orig_interfaces so that 'commit' diffs against nothing */
5117  instance->totem_config->orig_interfaces = malloc (sizeof (struct totem_interface) * INTERFACE_MAX);
5118  assert(instance->totem_config->orig_interfaces != NULL);
5119  memset(instance->totem_config->orig_interfaces, 0, sizeof (struct totem_interface) * INTERFACE_MAX);
5120 
5122 
5123  memb_state_gather_enter (instance, TOTEMSRP_GSFROM_INTERFACE_CHANGE);
5124  free(instance->totem_config->orig_interfaces);
5125  }
5126 }
5127 
5129  totem_config->net_mtu -= 2 * sizeof (struct mcast);
5130 }
5131 
5133  void *context,
5134  void (*totem_service_ready) (void))
5135 {
5136  struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
5137 
5138  instance->totemsrp_service_ready_fn = totem_service_ready;
5139 }
5140 
5142  void *context,
5143  const struct totem_ip_address *member,
5144  int iface_no)
5145 {
5146  struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
5147  int res;
5148 
5149  res = totemnet_member_add (instance->totemnet_context, &instance->my_addrs[iface_no], member, iface_no);
5150 
5151  return (res);
5152 }
5153 
5155  void *context,
5156  const struct totem_ip_address *member,
5157  int iface_no)
5158 {
5159  struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
5160  int res;
5161 
5162  res = totemnet_member_remove (instance->totemnet_context, member, iface_no);
5163 
5164  return (res);
5165 }
5166 
5167 void totemsrp_threaded_mode_enable (void *context)
5168 {
5169  struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
5170 
5171  instance->threaded_mode_enabled = 1;
5172 }
5173 
5174 void totemsrp_trans_ack (void *context)
5175 {
5176  struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
5177 
5178  instance->waiting_trans_ack = 0;
5179  instance->totemsrp_waiting_trans_ack_cb_fn (0);
5180 }
5181 
5182 
5183 int totemsrp_reconfigure (void *context, struct totem_config *totem_config)
5184 {
5185  struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
5186  int res;
5187 
5189  return (res);
5190 }
5191 
5193 {
5194  struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
5195  int res;
5196 
5198  return (res);
5199 }
5200 
5201 void totemsrp_stats_clear (void *context, int flags)
5202 {
5203  struct totemsrp_instance *instance = (struct totemsrp_instance *)context;
5204 
5205  memset(&instance->stats, 0, sizeof(totemsrp_stats_t));
5208  }
5209 }
5210 
5211 void totemsrp_force_gather (void *context)
5212 {
5213  timer_function_orf_token_timeout(context);
5214 }
totem_configuration_type
The totem_configuration_type enum.
Definition: coroapi.h:132
@ TOTEM_CONFIGURATION_REGULAR
Definition: coroapi.h:133
@ TOTEM_CONFIGURATION_TRANSITIONAL
Definition: coroapi.h:134
#define INTERFACE_MAX
Definition: coroapi.h:88
#define MESSAGE_QUEUE_MAX
Definition: coroapi.h:98
unsigned int nodeid
Definition: coroapi.h:0
unsigned char addr[TOTEMIP_ADDRLEN]
Definition: coroapi.h:2
totem_callback_token_type
The totem_callback_token_type enum.
Definition: coroapi.h:142
@ TOTEM_CALLBACK_TOKEN_SENT
Definition: coroapi.h:144
@ TOTEM_CALLBACK_TOKEN_RECEIVED
Definition: coroapi.h:143
#define PROCESSOR_COUNT_MAX
Definition: coroapi.h:96
#define CS_PRI_RING_ID_SEQ
Definition: corotypes.h:60
#define CS_PRI_NODE_ID
Definition: corotypes.h:59
#define CS_PRI_RING_ID
Definition: corotypes.h:61
uint32_t flags
uint32_t value
icmap_map_t icmap_get_global_map(void)
Return global icmap.
Definition: icmap.c:264
#define LOGSYS_LEVEL_DEBUG
Definition: logsys.h:76
struct srp_addr addr
Definition: totemsrp.c:164
int guarantee
Definition: totemsrp.c:190
unsigned int node_id
Definition: totemsrp.c:189
struct memb_ring_id ring_id
Definition: totemsrp.c:188
struct totem_message_header header
Definition: totemsrp.c:184
unsigned int seq
Definition: totemsrp.c:186
int this_seqno
Definition: totemsrp.c:187
struct srp_addr system_from
Definition: totemsrp.c:185
Definition: totemsrp.c:243
unsigned int aru
Definition: totemsrp.c:245
unsigned int received_flg
Definition: totemsrp.c:247
struct memb_ring_id ring_id
Definition: totemsrp.c:244
unsigned int high_delivered
Definition: totemsrp.c:246
unsigned int retrans_flg
Definition: totemsrp.c:255
struct totem_message_header header
Definition: totemsrp.c:252
unsigned char end_of_commit_token[0]
Definition: totemsrp.c:258
unsigned int token_seq
Definition: totemsrp.c:253
struct memb_ring_id ring_id
Definition: totemsrp.c:254
struct srp_addr system_from
Definition: totemsrp.c:217
struct totem_message_header header
Definition: totemsrp.c:216
unsigned char end_of_memb_join[0]
Definition: totemsrp.c:221
unsigned long long ring_seq
Definition: totemsrp.c:220
unsigned int failed_list_entries
Definition: totemsrp.c:219
unsigned int proc_list_entries
Definition: totemsrp.c:218
struct totem_message_header header
Definition: totemsrp.c:231
struct memb_ring_id ring_id
Definition: totemsrp.c:233
struct srp_addr system_from
Definition: totemsrp.c:232
The memb_ring_id struct.
Definition: coroapi.h:122
unsigned long long seq
Definition: coroapi.h:124
unsigned int rep
Definition: totem.h:150
int(* handler_functions[6])(struct totemsrp_instance *instance, const void *msg, size_t msg_len, int endian_conversion_needed)
Definition: totemsrp.c:535
unsigned int msg_len
Definition: totemsrp.c:269
struct mcast * mcast
Definition: totemsrp.c:268
unsigned int backlog
Definition: totemsrp.c:207
unsigned int token_seq
Definition: totemsrp.c:203
unsigned int aru_addr
Definition: totemsrp.c:205
unsigned int fcc
Definition: totemsrp.c:208
unsigned int aru
Definition: totemsrp.c:204
int rtr_list_entries
Definition: totemsrp.c:210
struct rtr_item rtr_list[0]
Definition: totemsrp.c:211
int retrans_flg
Definition: totemsrp.c:209
unsigned int seq
Definition: totemsrp.c:202
struct totem_message_header header
Definition: totemsrp.c:201
struct memb_ring_id ring_id
Definition: totemsrp.c:206
struct memb_ring_id ring_id
Definition: totemsrp.c:195
unsigned int seq
Definition: totemsrp.c:196
unsigned int msg_len
Definition: totemsrp.c:274
struct mcast * mcast
Definition: totemsrp.c:273
The sq struct.
Definition: sq.h:43
unsigned int nodeid
Definition: totemsrp.c:108
struct qb_list_head list
Definition: totemsrp.c:170
int(* callback_fn)(enum totem_callback_token_type type, const void *)
Definition: totemsrp.c:171
enum totem_callback_token_type callback_type
Definition: totemsrp.c:172
struct totem_message_header header
Definition: totemsrp.c:238
struct memb_ring_id ring_id
Definition: totemsrp.c:239
unsigned int max_messages
Definition: totem.h:219
unsigned int heartbeat_failures_allowed
Definition: totem.h:213
unsigned int token_timeout
Definition: totem.h:181
unsigned int window_size
Definition: totem.h:217
struct totem_logging_configuration totem_logging_configuration
Definition: totem.h:207
unsigned int downcheck_timeout
Definition: totem.h:199
unsigned int miss_count_const
Definition: totem.h:241
struct totem_interface * interfaces
Definition: totem.h:165
unsigned int fail_to_recv_const
Definition: totem.h:201
unsigned int merge_timeout
Definition: totem.h:197
struct totem_interface * orig_interfaces
Definition: totem.h:166
unsigned int net_mtu
Definition: totem.h:209
void(* totem_memb_ring_id_create_or_load)(struct memb_ring_id *memb_ring_id, unsigned int nodeid)
Definition: totem.h:247
unsigned int token_retransmits_before_loss_const
Definition: totem.h:189
unsigned int max_network_delay
Definition: totem.h:215
unsigned int seqno_unchanged_const
Definition: totem.h:203
unsigned int consensus_timeout
Definition: totem.h:195
unsigned int threads
Definition: totem.h:211
unsigned int send_join_timeout
Definition: totem.h:193
void(* totem_memb_ring_id_store)(const struct memb_ring_id *memb_ring_id, unsigned int nodeid)
Definition: totem.h:251
unsigned int token_retransmit_timeout
Definition: totem.h:185
unsigned int token_warning
Definition: totem.h:183
unsigned int join_timeout
Definition: totem.h:191
unsigned int token_hold_timeout
Definition: totem.h:187
struct totem_ip_address boundto
Definition: totem.h:84
uint8_t configured
Definition: totem.h:89
int member_count
Definition: totem.h:90
struct totem_ip_address member_list[PROCESSOR_COUNT_MAX]
Definition: totem.h:97
struct totem_ip_address mcast_addr
Definition: totem.h:85
The totem_ip_address struct.
Definition: coroapi.h:111
unsigned int nodeid
Definition: coroapi.h:112
unsigned short family
Definition: coroapi.h:113
void(* log_printf)(int level, int subsys, const char *function_name, const char *file_name, int file_line, const char *format,...) __attribute__((format(printf
Definition: totem.h:101
void(*) in log_level_security)
Definition: totem.h:108
unsigned int nodeid
Definition: totem.h:131
unsigned short magic
Definition: totem.h:127
struct totem_ip_address mcast_address
Definition: totemsrp.c:452
totemsrp_stats_t stats
Definition: totemsrp.c:516
struct srp_addr my_left_memb_list[PROCESSOR_COUNT_MAX]
Definition: totemsrp.c:320
int consensus_list_entries
Definition: totemsrp.c:300
int my_rotation_counter
Definition: totemsrp.c:358
int my_merge_detect_timeout_outstanding
Definition: totemsrp.c:346
unsigned int my_last_seq
Definition: totemsrp.c:496
qb_loop_timer_handle timer_heartbeat_timeout
Definition: totemsrp.c:419
int my_retrans_flg_count
Definition: totemsrp.c:362
unsigned int my_token_seq
Definition: totemsrp.c:396
qb_loop_timer_handle memb_timer_state_gather_join_timeout
Definition: totemsrp.c:413
struct consensus_list_item consensus_list[PROCESSOR_COUNT_MAX]
Definition: totemsrp.c:298
int totemsrp_subsys_id
Definition: totemsrp.c:436
qb_loop_timer_handle memb_timer_state_gather_consensus_timeout
Definition: totemsrp.c:415
uint64_t pause_timestamp
Definition: totemsrp.c:512
uint32_t threaded_mode_enabled
Definition: totemsrp.c:522
struct srp_addr my_memb_list[PROCESSOR_COUNT_MAX]
Definition: totemsrp.c:316
void * totemnet_context
Definition: totemsrp.c:500
int my_leave_memb_entries
Definition: totemsrp.c:338
struct srp_addr my_proc_list[PROCESSOR_COUNT_MAX]
Definition: totemsrp.c:308
int my_set_retrans_flg
Definition: totemsrp.c:360
struct srp_addr my_new_memb_list[PROCESSOR_COUNT_MAX]
Definition: totemsrp.c:312
int my_failed_list_entries
Definition: totemsrp.c:326
struct srp_addr my_failed_list[PROCESSOR_COUNT_MAX]
Definition: totemsrp.c:310
unsigned int use_heartbeat
Definition: totemsrp.c:504
void(* memb_ring_id_create_or_load)(struct memb_ring_id *memb_ring_id, unsigned int nodeid)
Definition: totemsrp.c:472
void(*) enum memb_stat memb_state)
Definition: totemsrp.c:446
qb_loop_timer_handle memb_timer_state_commit_timeout
Definition: totemsrp.c:417
struct cs_queue new_message_queue
Definition: totemsrp.c:371
int orf_token_retransmit_size
Definition: totemsrp.c:394
int my_proc_list_entries
Definition: totemsrp.c:324
unsigned int my_high_seq_received
Definition: totemsrp.c:354
void(* totemsrp_deliver_fn)(unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required)
Definition: totemsrp.c:454
int old_ring_state_aru
Definition: totemsrp.c:492
uint32_t orf_token_discard
Definition: totemsrp.c:518
struct qb_list_head token_callback_sent_listhead
Definition: totemsrp.c:390
unsigned int last_released
Definition: totemsrp.c:486
unsigned int set_aru
Definition: totemsrp.c:488
int totemsrp_log_level_notice
Definition: totemsrp.c:430
struct cs_queue new_message_queue_trans
Definition: totemsrp.c:373
int totemsrp_log_level_trace
Definition: totemsrp.c:434
char orf_token_retransmit[TOKEN_SIZE_MAX]
Definition: totemsrp.c:392
unsigned int my_trc
Definition: totemsrp.c:506
struct cs_queue retrans_message_queue
Definition: totemsrp.c:375
struct memb_ring_id my_ring_id
Definition: totemsrp.c:340
int totemsrp_log_level_error
Definition: totemsrp.c:426
void(* totemsrp_waiting_trans_ack_cb_fn)(int waiting_trans_ack)
Definition: totemsrp.c:469
unsigned int old_ring_state_high_seq_received
Definition: totemsrp.c:494
int fcc_remcast_current
Definition: totemsrp.c:296
qb_loop_timer_handle timer_orf_token_hold_retransmit_timeout
Definition: totemsrp.c:409
qb_loop_timer_handle timer_pause_timeout
Definition: totemsrp.c:401
unsigned int my_high_ring_delivered
Definition: totemsrp.c:364
qb_loop_timer_handle timer_orf_token_retransmit_timeout
Definition: totemsrp.c:407
struct totem_config * totem_config
Definition: totemsrp.c:502
int my_deliver_memb_entries
Definition: totemsrp.c:334
void(* totemsrp_service_ready_fn)(void)
Definition: totemsrp.c:467
int my_trans_memb_entries
Definition: totemsrp.c:330
uint32_t originated_orf_token
Definition: totemsrp.c:520
void * token_recv_event_handle
Definition: totemsrp.c:528
struct sq recovery_sort_queue
Definition: totemsrp.c:379
qb_loop_timer_handle timer_orf_token_timeout
Definition: totemsrp.c:403
qb_loop_timer_handle timer_merge_detect_timeout
Definition: totemsrp.c:411
int old_ring_state_saved
Definition: totemsrp.c:490
unsigned int my_leave_memb_list[PROCESSOR_COUNT_MAX]
Definition: totemsrp.c:322
void * token_sent_event_handle
Definition: totemsrp.c:529
unsigned int my_high_delivered
Definition: totemsrp.c:386
int totemsrp_log_level_security
Definition: totemsrp.c:424
int totemsrp_log_level_warning
Definition: totemsrp.c:428
struct memb_commit_token * commit_token
Definition: totemsrp.c:514
char commit_token_storage[40000]
Definition: totemsrp.c:530
struct memb_ring_id my_old_ring_id
Definition: totemsrp.c:342
struct timeval tv_old
Definition: totemsrp.c:498
int my_left_memb_entries
Definition: totemsrp.c:336
void(* memb_ring_id_store)(const struct memb_ring_id *memb_ring_id, unsigned int nodeid)
Definition: totemsrp.c:476
qb_loop_t * totemsrp_poll_handle
Definition: totemsrp.c:450
unsigned int my_install_seq
Definition: totemsrp.c:356
qb_loop_timer_handle timer_orf_token_warning
Definition: totemsrp.c:405
struct srp_addr my_trans_memb_list[PROCESSOR_COUNT_MAX]
Definition: totemsrp.c:314
struct srp_addr my_id
Definition: totemsrp.c:304
unsigned int my_cbl
Definition: totemsrp.c:510
int my_new_memb_entries
Definition: totemsrp.c:328
struct qb_list_head token_callback_received_listhead
Definition: totemsrp.c:388
unsigned int my_last_aru
Definition: totemsrp.c:348
unsigned int my_aru
Definition: totemsrp.c:384
uint32_t waiting_trans_ack
Definition: totemsrp.c:524
void(* totemsrp_log_printf)(int level, int subsys, const char *function, const char *file, int line, const char *format,...) __attribute__((format(printf
Definition: totemsrp.c:438
void(* totemsrp_confchg_fn)(enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id)
Definition: totemsrp.c:460
struct sq regular_sort_queue
Definition: totemsrp.c:377
unsigned long long token_ring_id_seq
Definition: totemsrp.c:484
struct srp_addr my_deliver_memb_list[PROCESSOR_COUNT_MAX]
Definition: totemsrp.c:318
int totemsrp_log_level_debug
Definition: totemsrp.c:432
unsigned int my_pbl
Definition: totemsrp.c:508
struct totem_ip_address my_addrs[INTERFACE_MAX]
Definition: totemsrp.c:306
uint64_t memb_join_tx
Definition: totemstats.h:59
uint32_t continuous_gather
Definition: totemstats.h:78
uint64_t recovery_entered
Definition: totemstats.h:74
uint64_t rx_msg_dropped
Definition: totemstats.h:77
uint64_t gather_entered
Definition: totemstats.h:70
uint64_t memb_commit_token_rx
Definition: totemstats.h:65
uint64_t mcast_retx
Definition: totemstats.h:62
uint64_t mcast_tx
Definition: totemstats.h:61
uint64_t memb_commit_token_tx
Definition: totemstats.h:64
uint64_t operational_token_lost
Definition: totemstats.h:69
uint64_t operational_entered
Definition: totemstats.h:68
uint64_t gather_token_lost
Definition: totemstats.h:71
uint64_t commit_token_lost
Definition: totemstats.h:73
uint64_t token_hold_cancel_tx
Definition: totemstats.h:66
uint64_t orf_token_rx
Definition: totemstats.h:56
totemsrp_token_stats_t token[TOTEM_TOKEN_STATS_MAX]
Definition: totemstats.h:90
uint64_t recovery_token_lost
Definition: totemstats.h:75
uint64_t commit_entered
Definition: totemstats.h:72
uint64_t memb_merge_detect_rx
Definition: totemstats.h:58
uint64_t memb_join_rx
Definition: totemstats.h:60
uint64_t orf_token_tx
Definition: totemstats.h:55
uint64_t memb_merge_detect_tx
Definition: totemstats.h:57
uint64_t mcast_rx
Definition: totemstats.h:63
uint64_t token_hold_cancel_rx
Definition: totemstats.h:67
uint64_t consensus_timeouts
Definition: totemstats.h:76
#define swab64(x)
The swab64 macro.
Definition: swab.h:65
#define swab16(x)
The swab16 macro.
Definition: swab.h:39
#define swab32(x)
The swab32 macro.
Definition: swab.h:51
totem_event_type
Definition: totem.h:268
#define TOTEM_MH_VERSION
Definition: totem.h:124
#define FRAME_SIZE_MAX
Definition: totem.h:52
cfg_message_crypto_reconfig_phase_t
Definition: totem.h:154
#define TOTEM_MH_MAGIC
Definition: totem.h:123
char type
Definition: totem.h:2
void totemconfig_commit_new_params(struct totem_config *totem_config, icmap_map_t map)
Definition: totemconfig.c:2364
void totemip_copy(struct totem_ip_address *addr1, const struct totem_ip_address *addr2)
Definition: totemip.c:123
const char * totemip_sa_print(const struct sockaddr *sa)
Definition: totemip.c:234
int totemnet_iface_set(void *net_context, const struct totem_ip_address *interface_addr, unsigned short ip_port, unsigned int iface_no)
Definition: totemnet.c:463
int totemnet_member_remove(void *net_context, const struct totem_ip_address *member, int ring_no)
Definition: totemnet.c:532
int totemnet_token_send(void *net_context, const void *msg, unsigned int msg_len)
Definition: totemnet.c:406
int totemnet_send_flush(void *net_context)
Definition: totemnet.c:396
int totemnet_initialize(qb_loop_t *loop_pt, void **net_context, struct totem_config *totem_config, totemsrp_stats_t *stats, void *context, void(*deliver_fn)(void *context, const void *msg, unsigned int msg_len, const struct sockaddr_storage *system_from), void(*iface_change_fn)(void *context, const struct totem_ip_address *iface_address, unsigned int ring_no), void(*mtu_changed)(void *context, int net_mtu), void(*target_set_completed)(void *context))
Definition: totemnet.c:309
void totemnet_buffer_release(void *net_context, void *ptr)
Definition: totemnet.c:367
int totemnet_mcast_flush_send(void *net_context, const void *msg, unsigned int msg_len)
Definition: totemnet.c:418
int totemnet_member_add(void *net_context, const struct totem_ip_address *local, const struct totem_ip_address *member, int ring_no)
Definition: totemnet.c:512
int totemnet_finalize(void *net_context)
Definition: totemnet.c:298
int totemnet_crypto_set(void *net_context, const char *cipher_type, const char *hash_type)
Definition: totemnet.c:284
int totemnet_ifaces_get(void *net_context, char ***status, unsigned int *iface_count)
Definition: totemnet.c:476
int totemnet_processor_count_set(void *net_context, int processor_count)
Definition: totemnet.c:375
int totemnet_token_target_set(void *net_context, unsigned int nodeid)
Definition: totemnet.c:489
int totemnet_recv_flush(void *net_context)
Definition: totemnet.c:386
int totemnet_iface_check(void *net_context)
Definition: totemnet.c:444
int totemnet_mcast_noflush_send(void *net_context, const void *msg, unsigned int msg_len)
Definition: totemnet.c:431
int totemnet_recv_mcast_empty(void *net_context)
Definition: totemnet.c:501
void totemnet_stats_clear(void *net_context)
Definition: totemnet.c:598
int totemnet_reconfigure(void *net_context, struct totem_config *totem_config)
Definition: totemnet.c:568
void * totemnet_buffer_alloc(void *net_context)
Definition: totemnet.c:359
int totemnet_crypto_reconfigure_phase(void *net_context, struct totem_config *totem_config, cfg_message_crypto_reconfig_phase_t phase)
Definition: totemnet.c:582
Totem Network interface - also does encryption/decryption.
int totemsrp_my_family_get(void *srp_context)
Definition: totemsrp.c:1113
#define SEQNO_START_TOKEN
Definition: totemsrp.c:122
unsigned long long ring_seq
Definition: totemsrp.c:4
#define RETRANSMIT_ENTRIES_MAX
Definition: totemsrp.c:100
unsigned long long int tv_old
Definition: totemsrp.c:3803
unsigned int seq
Definition: totemsrp.c:2
#define log_printf(level, format, args...)
Definition: totemsrp.c:690
void totemsrp_force_gather(void *context)
Definition: totemsrp.c:5211
int rtr_list_entries
Definition: totemsrp.c:9
void totemsrp_service_ready_register(void *context, void(*totem_service_ready)(void))
Definition: totemsrp.c:5132
int totemsrp_initialize(qb_loop_t *poll_handle, void **srp_context, struct totem_config *totem_config, totempg_stats_t *stats, void(*deliver_fn)(unsigned int nodeid, const void *msg, unsigned int msg_len, int endian_conversion_required), void(*confchg_fn)(enum totem_configuration_type configuration_type, const unsigned int *member_list, size_t member_list_entries, const unsigned int *left_list, size_t left_list_entries, const unsigned int *joined_list, size_t joined_list_entries, const struct memb_ring_id *ring_id), void(*waiting_trans_ack_cb_fn)(int waiting_trans_ack))
Create a protocol instance.
Definition: totemsrp.c:819
int totemsrp_callback_token_create(void *srp_context, void **handle_out, enum totem_callback_token_type type, int delete, int(*callback_fn)(enum totem_callback_token_type type, const void *), const void *data)
Definition: totemsrp.c:3461
#define RETRANS_MESSAGE_QUEUE_SIZE_MAX
Definition: totemsrp.c:97
void totemsrp_threaded_mode_enable(void *context)
Definition: totemsrp.c:5167
struct rtr_item rtr_list[0]
Definition: totemsrp.c:10
message_type
Definition: totemsrp.c:146
@ MESSAGE_TYPE_MEMB_COMMIT_TOKEN
Definition: totemsrp.c:151
@ MESSAGE_TYPE_TOKEN_HOLD_CANCEL
Definition: totemsrp.c:152
@ MESSAGE_TYPE_ORF_TOKEN
Definition: totemsrp.c:147
@ MESSAGE_TYPE_MEMB_JOIN
Definition: totemsrp.c:150
@ MESSAGE_TYPE_MEMB_MERGE_DETECT
Definition: totemsrp.c:149
@ MESSAGE_TYPE_MCAST
Definition: totemsrp.c:148
void totemsrp_net_mtu_adjust(struct totem_config *totem_config)
Definition: totemsrp.c:5128
#define TOKEN_SIZE_MAX
Definition: totemsrp.c:101
encapsulation_type
Definition: totemsrp.c:155
@ MESSAGE_NOT_ENCAPSULATED
Definition: totemsrp.c:157
@ MESSAGE_ENCAPSULATED
Definition: totemsrp.c:156
unsigned int failed_list_entries
Definition: totemsrp.c:3
struct message_handlers totemsrp_message_handlers
Definition: totemsrp.c:678
#define LEAVE_DUMMY_NODEID
Definition: totemsrp.c:102
#define QUEUE_RTR_ITEMS_SIZE_MAX
Definition: totemsrp.c:96
void main_iface_change_fn(void *context, const struct totem_ip_address *iface_address, unsigned int iface_no)
Definition: totemsrp.c:5071
int guarantee
Definition: totemsrp.c:6
unsigned int aru
Definition: totemsrp.c:3
gather_state_from
Definition: totemsrp.c:542
@ TOTEMSRP_GSFROM_THE_CONSENSUS_TIMEOUT_EXPIRED
Definition: totemsrp.c:546
@ TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_GATHER_STATE
Definition: totemsrp.c:551
@ TOTEMSRP_GSFROM_FAILED_TO_RECEIVE
Definition: totemsrp.c:549
@ TOTEMSRP_GSFROM_CONSENSUS_TIMEOUT
Definition: totemsrp.c:543
@ TOTEMSRP_GSFROM_MERGE_DURING_OPERATIONAL_STATE
Definition: totemsrp.c:552
@ TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_OPERATIONAL_STATE
Definition: totemsrp.c:545
@ TOTEMSRP_GSFROM_MERGE_DURING_JOIN
Definition: totemsrp.c:554
@ TOTEMSRP_GSFROM_INTERFACE_CHANGE
Definition: totemsrp.c:558
@ TOTEMSRP_GSFROM_GATHER_MISSING1
Definition: totemsrp.c:544
@ TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_COMMIT_STATE
Definition: totemsrp.c:547
@ TOTEMSRP_GSFROM_MAX
Definition: totemsrp.c:559
@ TOTEMSRP_GSFROM_JOIN_DURING_COMMIT_STATE
Definition: totemsrp.c:556
@ TOTEMSRP_GSFROM_THE_TOKEN_WAS_LOST_IN_THE_RECOVERY_STATE
Definition: totemsrp.c:548
@ TOTEMSRP_GSFROM_FOREIGN_MESSAGE_IN_OPERATIONAL_STATE
Definition: totemsrp.c:550
@ TOTEMSRP_GSFROM_JOIN_DURING_OPERATIONAL_STATE
Definition: totemsrp.c:555
@ TOTEMSRP_GSFROM_MERGE_DURING_GATHER_STATE
Definition: totemsrp.c:553
@ TOTEMSRP_GSFROM_JOIN_DURING_RECOVERY
Definition: totemsrp.c:557
int totemsrp_crypto_set(void *srp_context, const char *cipher_type, const char *hash_type)
Definition: totemsrp.c:1088
int totemsrp_avail(void *srp_context)
Return number of available messages that can be queued.
Definition: totemsrp.c:2534
void totemsrp_stats_clear(void *context, int flags)
Definition: totemsrp.c:5201
int totemsrp_iface_set(void *context, const struct totem_ip_address *interface_addr, unsigned short ip_port, unsigned int iface_no)
Definition: totemsrp.c:5050
void totemsrp_finalize(void *srp_context)
Definition: totemsrp.c:1027
struct memb_ring_id ring_id
Definition: totemsrp.c:4
void totemsrp_trans_ack(void *context)
Definition: totemsrp.c:5174
int totemsrp_crypto_reconfigure_phase(void *context, struct totem_config *totem_config, cfg_message_crypto_reconfig_phase_t phase)
Definition: totemsrp.c:5192
unsigned int totemsrp_my_nodeid_get(void *srp_context)
Definition: totemsrp.c:1102
int addr_entries
Definition: totemsrp.c:5
unsigned int backlog
Definition: totemsrp.c:6
#define SEQNO_START_MSG
Definition: totemsrp.c:121
void totemsrp_event_signal(void *srp_context, enum totem_event_type type, int value)
Definition: totemsrp.c:2454
void totemsrp_callback_token_destroy(void *srp_context, void **handle_out)
Definition: totemsrp.c:3496
unsigned int received_flg
Definition: totemsrp.c:3
struct message_item __attribute__
int totemsrp_member_add(void *context, const struct totem_ip_address *member, int iface_no)
Definition: totemsrp.c:5141
int totemsrp_ifaces_get(void *srp_context, unsigned int nodeid, unsigned int *interface_id, struct totem_ip_address *interfaces, unsigned int interfaces_size, char ***status, unsigned int *iface_count)
Definition: totemsrp.c:1050
int totemsrp_reconfigure(void *context, struct totem_config *totem_config)
Definition: totemsrp.c:5183
unsigned int high_delivered
Definition: totemsrp.c:2
struct srp_addr system_from
Definition: totemsrp.c:1
unsigned int proc_list_entries
Definition: totemsrp.c:2
const char * gather_state_from_desc[]
Definition: totemsrp.c:562
int totemsrp_member_remove(void *context, const struct totem_ip_address *member, int iface_no)
Definition: totemsrp.c:5154
int totemsrp_mcast(void *srp_context, struct iovec *iovec, unsigned int iov_len, int guarantee)
Multicast a message.
Definition: totemsrp.c:2463
void main_deliver_fn(void *context, const void *msg, unsigned int msg_len, const struct sockaddr_storage *system_from)
Definition: totemsrp.c:4999
memb_state
Definition: totemsrp.c:277
@ MEMB_STATE_GATHER
Definition: totemsrp.c:279
@ MEMB_STATE_RECOVERY
Definition: totemsrp.c:281
@ MEMB_STATE_COMMIT
Definition: totemsrp.c:280
@ MEMB_STATE_OPERATIONAL
Definition: totemsrp.c:278
Totem Single Ring Protocol.
#define TOTEMPG_STATS_CLEAR_TRANSPORT
Definition: totemstats.h:116
#define TOTEM_TOKEN_STATS_MAX
Definition: totemstats.h:89