11 #include <sys/resource.h>
21 #define STORM_INTERVAL 2
34 time_t last_election_loss;
48 static gboolean election_timer_cb(gpointer user_data)
52 crm_info(
"%s timed out, declaring local node as winner", e->name);
87 static guint count = 0;
93 crm_perror(LOG_CRIT,
"Cannot create election");
97 e->uname = strdup(
uname);
98 if (e->uname == NULL) {
99 crm_perror(LOG_CRIT,
"Cannot create election");
108 election_timer_cb, e);
125 if(e &&
uname && e->voted) {
126 crm_trace(
"Discarding %s (no-)vote from lost peer %s", e->name,
uname);
127 g_hash_table_remove(e->voted,
uname);
140 crm_trace(
"Resetting election %s", e->name);
143 crm_trace(
"Destroying voted cache with %d members", g_hash_table_size(e->voted));
144 g_hash_table_destroy(e->voted);
204 crm_err(
"No election defined");
209 crm_uptime(
struct timeval *output)
211 static time_t expires = 0;
212 static struct rusage info;
214 time_t tm_now = time(NULL);
216 if (expires < tm_now) {
219 info.ru_utime.tv_sec = 0;
220 info.ru_utime.tv_usec = 0;
221 rc = getrusage(RUSAGE_SELF, &info);
227 crm_perror(LOG_ERR,
"Could not calculate the current uptime");
232 crm_debug(
"Current CPU usage is: %lds, %ldus", (
long)info.ru_utime.tv_sec,
233 (
long)info.ru_utime.tv_usec);
237 output->tv_sec = info.ru_utime.tv_sec;
238 output->tv_usec = info.ru_utime.tv_usec;
244 crm_compare_age(
struct timeval your_age)
246 struct timeval our_age;
248 crm_uptime(&our_age);
250 if (our_age.tv_sec > your_age.tv_sec) {
251 crm_debug(
"Win: %ld vs %ld (seconds)", (
long)our_age.tv_sec, (
long)your_age.tv_sec);
253 }
else if (our_age.tv_sec < your_age.tv_sec) {
254 crm_debug(
"Lose: %ld vs %ld (seconds)", (
long)our_age.tv_sec, (
long)your_age.tv_sec);
256 }
else if (our_age.tv_usec > your_age.tv_usec) {
257 crm_debug(
"Win: %ld.%06ld vs %ld.%06ld (usec)",
258 (
long)our_age.tv_sec, (
long)our_age.tv_usec, (
long)your_age.tv_sec, (
long)your_age.tv_usec);
260 }
else if (our_age.tv_usec < your_age.tv_usec) {
261 crm_debug(
"Lose: %ld.%06ld vs %ld.%06ld (usec)",
262 (
long)our_age.tv_sec, (
long)our_age.tv_usec, (
long)your_age.tv_sec, (
long)your_age.tv_usec);
286 xmlNode *vote = NULL;
290 crm_trace(
"Election vote requested, but no election available");
296 crm_trace(
"Cannot vote in %s yet: local node not connected to cluster",
316 crm_debug(
"Started %s round %d", e->name, e->count);
317 election_timeout_start(e);
343 crm_trace(
"Election check requested, but no election available");
346 if (e->voted == NULL) {
347 crm_trace(
"%s check requested, but no votes received yet", e->name);
351 voted_size = g_hash_table_size(e->voted);
358 if (voted_size >= num_members) {
361 if (voted_size > num_members) {
362 GHashTableIter gIter;
366 crm_warn(
"Received too many votes in %s", e->name);
368 while (g_hash_table_iter_next(&gIter, NULL, (gpointer *) & node)) {
370 crm_warn(
"* expected vote: %s", node->uname);
374 g_hash_table_iter_init(&gIter, e->voted);
375 while (g_hash_table_iter_next(&gIter, (gpointer *) & key, NULL)) {
381 crm_info(
"%s won by local node", e->name);
382 election_complete(e);
386 crm_debug(
"%s still waiting on %d of %d votes",
387 e->name, num_members - voted_size, num_members);
393 #define LOSS_DAMPEN 2
399 const char *election_owner;
416 parse_election_message(
election_t *e, xmlNode *message,
struct vote *vote)
418 CRM_CHECK(message && vote,
return FALSE);
420 vote->election_id = -1;
421 vote->age.tv_sec = -1;
422 vote->age.tv_usec = -1;
431 if ((vote->op == NULL) || (vote->from == NULL) || (vote->version == NULL)
432 || (vote->election_owner == NULL) || (vote->election_id < 0)) {
434 crm_warn(
"Invalid %s message from %s in %s ",
435 (vote->op? vote->op :
"election"),
436 (vote->from? vote->from :
"unspecified node"),
437 (e? e->name :
"election"));
447 if ((vote->age.tv_sec < 0) || (vote->age.tv_usec < 0)) {
448 crm_warn(
"Cannot count %s %s from %s because it is missing uptime",
449 (e? e->name :
"election"), vote->op, vote->from);
454 crm_info(
"Cannot process %s message from %s because %s is not a known election op",
455 (e? e->name :
"election"), vote->from, vote->op);
462 crm_info(
"Cannot count %s from %s because no election available",
463 vote->op, vote->from);
471 crm_info(
"Cannot count %s %s from %s because no peer information available",
472 e->name, vote->op, vote->from);
481 char *voter_copy = NULL;
482 char *vote_copy = NULL;
484 CRM_ASSERT(e && vote && vote->from && vote->op);
485 if (e->voted == NULL) {
486 e->voted = crm_str_table_new();
489 voter_copy = strdup(vote->from);
490 vote_copy = strdup(vote->op);
493 g_hash_table_replace(e->voted, voter_copy, vote_copy);
497 send_no_vote(
crm_node_t *peer,
struct vote *vote)
529 int log_level = LOG_INFO;
530 gboolean done = FALSE;
531 gboolean we_lose = FALSE;
532 const char *reason =
"unknown";
533 bool we_are_owner = FALSE;
534 crm_node_t *our_node = NULL, *your_node = NULL;
535 time_t tm_now = time(NULL);
539 if (parse_election_message(e, message, &vote) == FALSE) {
545 we_are_owner = (our_node != NULL)
548 if(can_win == FALSE) {
549 reason =
"Not eligible";
553 reason =
"We are not part of the cluster";
557 }
else if (we_are_owner && (vote.election_id != e->count)) {
559 reason =
"Superseded";
564 reason =
"Peer is not part of our cluster";
565 log_level = LOG_WARNING;
574 crm_warn(
"Cannot count %s round %d %s from %s because we are not election owner (%s)",
575 e->name, vote.election_id, vote.op, vote.from,
576 vote.election_owner);
581 crm_debug(
"Not counting %s round %d %s from %s because no election in progress",
582 e->name, vote.election_id, vote.op, vote.from);
585 record_vote(e, &vote);
591 int age_result = crm_compare_age(vote.age);
594 if (version_result < 0) {
598 }
else if (version_result > 0) {
601 }
else if (age_result < 0) {
605 }
else if (age_result > 0) {
608 }
else if (strcasecmp(e->uname, vote.from) > 0) {
609 reason =
"Host name";
613 reason =
"Host name";
617 if (e->expires < tm_now) {
618 e->election_wins = 0;
621 }
else if (done == FALSE && we_lose == FALSE) {
628 if (e->election_wins > (peers * peers)) {
629 crm_warn(
"%s election storm detected: %d wins in %d seconds",
631 e->election_wins = 0;
633 if (e->wrote_blackbox == FALSE) {
645 e->wrote_blackbox = TRUE;
652 "Processed %s round %d %s (current round %d) from %s (%s)",
653 e->name, vote.election_id, vote.op, e->count, vote.from,
657 }
else if (we_lose == FALSE) {
671 if ((e->last_election_loss == 0)
672 || ((tm_now - e->last_election_loss) > (time_t)
LOSS_DAMPEN)) {
674 do_crm_log(log_level,
"%s round %d (owner node ID %s) pass: %s from %s (%s)",
675 e->name, vote.election_id, vote.election_owner, vote.op,
678 e->last_election_loss = 0;
685 char *loss_time = ctime(&e->last_election_loss);
692 crm_info(
"Ignoring %s round %d (owner node ID %s) pass vs %s because we lost less than %ds ago at %s",
693 e->name, vote.election_id, vote.election_owner, vote.from,
698 e->last_election_loss = tm_now;
700 do_crm_log(log_level,
"%s round %d (owner node ID %s) lost: %s from %s (%s)",
701 e->name, vote.election_id, vote.election_owner, vote.op,
705 send_no_vote(your_node, &vote);
718 e->last_election_loss = 0;