Page 1 of 2 12 LastLast
Results 1 to 10 of 11

Thread: Escalation problem

  1. #1
    Junior Member
    Join Date
    Feb 2012
    Posts
    6

    Escalation problem

    Hello,

    I'm trying to use escalation for my monitoring, but I've got a little problem, when the escalation is triggered, shinken send an email every seconds !

    I've searched a while and simplify my configuration, but I always meet this problem.

    I don't know if I miss something in my configuration or if it's a bug, and I have no clue for debugging

    Some conf parts :
    Code:
    define escalation {
    	escalation_name		EscaladeWDM
    	contacts		wdmedia
    	first_notification_time 5
    	last_notification_time	600
    	notification_interval	60
    	escalation_period	workhours
    	escalation_options	w,u,c,r
    }
    
    define contact{
    	use					generic-contact
        contact_name              wdmedia
    	email					noiwontputithere@norhere.net
    	host_notification_period		workhours
    	service_notification_period		workhours
    }
    
    define service {
    	use			generic-load
    	host_name		ns212127
        escalations      EscaladeWDM
    }
    
    define service{
        name              generic-load
    	use				generic-service
    	service_description       LOAD
    	normal_check_interval      3
    	retry_check_interval      1
    	max_check_attempts       3
    	notification_interval      60
    	check_command          check_nrpe_load
        register            0    ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
        }
    
    define service{
        name              generic-service 	; The 'name' of this service template
        active_checks_enabled      1    		; Active service checks are enabled
        passive_checks_enabled     1  		  	; Passive service checks are enabled/accepted
    	check_interval      5			; Check the service every 5 minutes in normal state
        retry_interval      1			; Re-check the service every one minutes until a hard state can be determined
        max_check_attempts    2			; Re-check the service up to 3 times in order to determine its final (hard) state
        check_period       24x7			; The service can be checked at any time of the day
        notifications_enabled      1    		; Service notifications are enabled
    	notification_options		w,u,c,r			; Send notifications about warning, unknown, critical, and recovery events
        notification_interval      60			; Re-notify about service problems every hour
    	notification_period       24x7
        flap_detection_enabled     1    		; Flap detection is enabled
    	check_freshness         0
    	freshness_threshold		3600
        obsess_over_service       0
        process_perf_data        1    		; Process perf data, like for PNP
    	is_volatile           0    		; for log monitoring. See doc for more info about it
        register            0
        }
    
    define host{
    	name				generic-host
    	alias				generic-host
    	max_check_attempts		2
    	check_interval			5
    	active_checks_enabled		1
    	check_period			24x7
    	contact_groups			admins
    	notification_interval		60
    	notification_period		24x7
    	notification_options		d,u,r,f
    	notifications_enabled		1
    	event_handler_enabled		0
    	flap_detection_enabled		1
    	process_perf_data		1
    	register			0
    }
    
    define contact{
        name              generic-contact  	; The name of this contact template
        service_notification_period   24x7			; service notifications can be sent anytime
        host_notification_period    24x7			; host notifications can be sent anytime
        service_notification_options  c,w,r
        host_notification_options    d,u,r,f,s		; send notifications for all host states, flapping events, and scheduled downtime events
        service_notification_commands  notify-service-by-email	; send service notifications via email
        host_notification_commands   notify-host-by-email	; send host notifications via email
        register            0    		; DONT REGISTER THIS DEFINITION - ITS NOT A REAL CONTACT, JUST A TEMPLATE!
    	host_notifications_enabled	1
    	service_notifications_enabled	1
    	host_notification_period		24x7
    	service_notification_period		24x7
    	email					nothere@nothere@net
    	}
    And some log :

    2012-02-25 19:31:58,278 [1330194718] SERVICE NOTIFICATION: admin;ns212127;LOAD;WARNING;notify-service-by-email;WARNING - Charge moyenne: 3.13, 3.04, 3.19

    2012-02-25 19:36:57,750 [1330195017] SERVICE NOTIFICATION: wdmedia;ns212127;LOAD;WARNING;notify-service-by-email;WARNING - Charge moyenne: 2.57, 2.77, 3.05

    2012-02-25 19:36:58,753 [1330195018] SERVICE NOTIFICATION: wdmedia;ns212127;LOAD;WARNING;notify-service-by-email;WARNING - Charge moyenne: 2.57, 2.77, 3.05

    2012-02-25 19:36:59,756 [1330195019] SERVICE NOTIFICATION: wdmedia;ns212127;LOAD;WARNING;notify-service-by-email;WARNING - Charge moyenne: 2.57, 2.77, 3.05

    2012-02-25 19:37:00,776 [1330195020] SERVICE NOTIFICATION: wdmedia;ns212127;LOAD;WARNING;notify-service-by-email;WARNING - Charge moyenne: 2.57, 2.77, 3.05

    2012-02-25 19:37:01,796 [1330195021] SERVICE NOTIFICATION: wdmedia;ns212127;LOAD;WARNING;notify-service-by-email;WARNING - Charge moyenne: 2.57, 2.77, 3.05

    2012-02-25 19:37:02,816 [1330195022] SERVICE NOTIFICATION: wdmedia;ns212127;LOAD;WARNING;notify-service-by-email;WARNING - Charge moyenne: 2.57, 2.77, 3.05

    2012-02-25 19:37:03,853 [1330195023] SERVICE NOTIFICATION: wdmedia;ns212127;LOAD;WARNING;notify-service-by-email;WARNING - Charge moyenne: 2.57, 2.77, 3.05


    2012-02-25 19:37:05,943 [1330195025] SERVICE NOTIFICATION: wdmedia;ns212127;LOAD;WARNING;notify-service-by-email;WARNING - Charge moyenne: 2.57, 2.77, 3.05

    2012-02-25 19:37:06,996 [1330195026] SERVICE NOTIFICATION: wdmedia;ns212127;LOAD;WARNING;notify-service-by-email;WARNING - Charge moyenne: 2.57, 2.77, 3.05

    2012-02-25 19:37:08,068 [1330195028] SERVICE NOTIFICATION: wdmedia;ns212127;LOAD;WARNING;notify-service-by-email;WARNING - Charge moyenne: 2.57, 2.77, 3.05

    2012-02-25 19:37:09,138 [1330195029] SERVICE NOTIFICATION: wdmedia;ns212127;LOAD;WARNING;notify-service-by-email;WARNING - Charge moyenne: 2.57, 2.77, 3.05

    2012-02-25 19:37:10,227 [1330195030] SERVICE NOTIFICATION: wdmedia;ns212127;LOAD;WARNING;notify-service-by-email;WARNING - Charge moyenne: 2.57, 2.77, 3.05

    2012-02-25 19:37:11,315 [1330195031] SERVICE NOTIFICATION: wdmedia;ns212127;LOAD;WARNING;notify-service-by-email;WARNING - Charge moyenne: 2.57, 2.77, 3.05

    PS : sorry for my poor french based english

    Regards,
    ManicoW

  2. #2
    Shinken project leader
    Join Date
    May 2011
    Location
    Bordeaux (France)
    Posts
    2,130

    Re: Escalation problem

    Hi,

    can you try with the 1.0rc1 version?

    If I'm not wrong, sounds like a old bug this one.
    No direct support by personal message. Please open a thread so everyone can see the solution

  3. #3
    Junior Member
    Join Date
    Feb 2012
    Posts
    6

    Re: Escalation problem

    Hi,

    Ok, I'll try it as soon as I'll have a little time, and I'll keep you informated

    Regards

  4. #4
    Shinken project leader
    Join Date
    May 2011
    Location
    Bordeaux (France)
    Posts
    2,130

    Re: Escalation problem

    Oki
    No direct support by personal message. Please open a thread so everyone can see the solution

  5. #5
    Junior Member
    Join Date
    Feb 2012
    Posts
    6

    Re: Escalation problem

    Du coup je continue en français, vu qu'on le parle tout les deux, au moins

    J'ai donc testé avec la 1.0rc1, et j'ai eut le même résultat.

    J'ai essayé de regarder un peu le code (je suis loin d'être un expert python, mon niveau approcherai plutot le bricolage occasionnel), pour voir si un élément de configuration ou autre manquait, mais du coup en creusant je n'ai pas trouvé la logique pour l'interval des notifications sur les escalades basées sur le temps.

    En effet, lors du get_next_notif_time au niveau des escalades, il n'y a pas de notion de notification_interval, le seul test que je vois c'est de vérifier si il n'y a pas une escalade qui fait qu'une notification devrait partit plus tot (ce qui est le cas là en effet), mais rien qui ne repousse la prochaine notification escaladée a notification_interval (ou alors j'ai mal compris les mecanisme, c'est tout a fait possible).

    Bref, j'avoue qu'un éclairage la dessus me serait bien utile (d'autant plus qu'en recherchant dans mes archives, j'avais bien rencontré le même problème sur un serveur de monitoring qui n'a rien a voir, en version 0.8.1 de shinken, et je n'ai pas trouvé beaucoup de changement a ce sujet dans le code entre la 0.8.1 et la 1.0rc1).

    Merci d'avance

  6. #6
    Junior Member
    Join Date
    Feb 2012
    Posts
    6

    Re: Escalation problem

    Bon, je confirme que ca ressemble a un bug, je viens d'essayer sur une machine fraiche, avec un shinken 1.0 installé via le install -i , j'ai juste gardé le service load pour le local, créé une escalade de la manière suivante :

    define escalation {
    escalation_name To_wdmedia
    first_notification_time 5
    last_notification_time 15
    notification_interval 3
    contacts wdmedia
    }
    define service{
    service_description LocalLoad
    use local-service ; Name of service template to use
    host_name localhost
    check_command check_local_load
    escalations To_wdmedia
    }

    et hop, même problèmes, au bout de 5 minutes l'escalade démarre bien mais il envoi un mail par seconde.

    Je crois que je vais faire un petit rapport de bug

  7. #7
    Shinken project leader
    Join Date
    May 2011
    Location
    Bordeaux (France)
    Posts
    2,130

    Re: Escalation problem

    Ou la pas bon en effet. Ouvre le ticket avec l'exemple et je tente de tracer ça

    English : It seems to be a real bug in 1.0 for escalation (1s notification send). We are hunting it
    No direct support by personal message. Please open a thread so everyone can see the solution

  8. #8
    Shinken project leader
    Join Date
    May 2011
    Location
    Bordeaux (France)
    Posts
    2,130

    Re: Escalation problem

    I think I spot the bug. I'm finishing to write the test case, and I commit
    No direct support by personal message. Please open a thread so everyone can see the solution

  9. #9
    Shinken project leader
    Join Date
    May 2011
    Location
    Bordeaux (France)
    Posts
    2,130

    Re: Escalation problem

    Ok should be fixed with this commit : https://github.com/naparuba/shinken/...62b16b0eaf1e02

    You can give a try by applying it, or wait for a 1.0.1 version for the next week I think.
    No direct support by personal message. Please open a thread so everyone can see the solution

  10. #10
    Junior Member
    Join Date
    Feb 2012
    Posts
    6

    Re: Escalation problem

    Hurra ! Oups :/

    No more flood notification with this patch, the first escalation notification is send just in time, but... the notification_interval of the escalation is not used. I've tryied to specify an interval of 3 for the escalation, 60 in the normal service, and only the normal service notification interval is used.

    I'll try to set my last_notification_time to 180, in order to see if the escalation contact is notified in the normal notification interval, I'll put here if it works or not.


Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •