Results 1 to 8 of 8

Thread: Problem with the high availability

  1. #1
    Junior Member
    Join Date
    Sep 2012
    Posts
    15

    Problem with the high availability

    Hello

    I try to make the high availability but with no success.

    I have on both server the same shinken-specifig files. Here are the define of the satellites from the shinken-specific.cfg:
    Code:
    define poller {
     poller_name poller-1
     data_timeout 120
     timeout 3
     address 10.40.90.44
     modules
     port 7771
     manage_sub_realms 0
     max_workers 0
     check_interval 60
     polling_interval 1
     realm All
     max_check_attempts 3
     min_workers 0
     processes_by_worker 256
    }
    
    define reactionner {
     data_timeout 120
     timeout 3
     address 10.40.90.44
     modules
     reactionner_name reactionner-1
     manage_sub_realms 0
     max_workers 15
     spare 0
     check_interval 60
     polling_interval 1
     realm All
     max_check_attempts 3
     min_workers 1
     port 7769
    }
    
    define broker {
     broker_name broker-1
     data_timeout 120
     timeout 3
     modules Livestatus, Simple-log, WebUI, NPCDMOD
     manage_arbiters 1
     manage_sub_realms 1
     spare 0
     check_interval 60
     address 10.40.90.44
     realm All
     max_check_attempts 3
     port 7772
    }
    
    define arbiter {
     modules ,PickleRetentionArbiter, CommandFile
     spare 0
     address 10.40.90.44
     port 7770
     arbiter_name Arbiter-Master
    }
    
    define scheduler {
     data_timeout 120
     timeout 3
     weight 1
     skip_initial_broks 0
     modules ,PickleRetention
     spare 0
    check_interval 60
     address 10.40.90.44
     scheduler_name scheduler-1
     max_check_attempts 3
     realm All
     port 7768
    }
    
    define receiver {
     modules NSCA
     data_timeout 120
     timeout 3
     direct_routing 0
     port 7773
     spare 0
     check_interval 60
     address 10.40.90.44
     realm All
     max_check_attempts 3
     receiver_name receiver-1
    }
    
    define scheduler{
        scheduler_name  scheduler-spare
        address     10.40.90.45
        port       7768
        spare      1
        modules ,PickleRetention
        realm      All
        }
    
    define poller{
        poller_name   poller-spare
        address     10.40.90.45
        port      7771
        spare      1
        modules
        realm      All
    }
    
    define reactionner{
        reactionner_name reactionner-spare
        address     10.40.90.45
        port       7769
        spare      1
       modules
        realm      All
       }
    
    define receiver{
        modules NSCA
        receiver_name  receiver-spare
        address     10.40.90.45
        port       7773
        spare      1
        realm      All
    }
    
    define broker{
        broker_name   broker-spare
        address     10.40.90.45
        port      7772
        spare      1
        modules Livestatus, Simple-log, WebUI, NPCDMOD
        realm      All
    }
    
    define arbiter{
        arbiter_name  arbiter-spare
        address     10.40.90.45
        host_name    shinken-02
        port      7770
        spare      1
        modules ,PickleRetentionArbiter, CommandFile
        realm      All
    }
    Shinken-01 -> 10.40.90.44 = Master
    Shinken-02 -> 10.40.90.45 = Spare

    When I restart Shinken on the shinken-01 server I don't get any Error, but on the Shinken-02 Error I get the followed Error:
    Code:
    [1349092215] Info :  Processing object config file '/usr/local/shinken/etc/objects/discovery/xen-h1/xen-h1.cfg'
    [1349092215] Info :  Processing object config file '/usr/local/shinken/etc/objects/discovery/prodsys03/prodsys03.cfg'
    [1349092215] Info :  Processing object config file '/usr/local/shinken/etc/objects/discovery/redmine-01/redmine-01.cfg'
    [1349092215] Info :  Processing object config file '/usr/local/shinken/etc/objects/discovery/so-sparc-v8z3/so-sparc-v8z3.cfg'
    [1349092215] Info :  Processing object config file '/usr/local/shinken/etc/objects/discovery/prodsys04/prodsys04.cfg'
    [1349092215] Info :  Processing object config file '/usr/local/shinken/etc/objects/discovery/xen-h10/xen-h10.cfg'
    [1349092215] Info :  Processing object config file '/usr/local/shinken/etc/objects/discovery/fb-i386-v62/fb-i386-v62.cfg'
    [1349092215] Info :  Processing object config file '/usr/local/shinken/etc/objects/discovery/vdftest-win2003/vdftest-win2003.cfg'
    [1349092215] Info :  Processing object config file '/usr/local/shinken/etc/objects/discovery/xen-h2/xen-h2.cfg'
    [1349092215] Info :  Processing object config file '/usr/local/shinken/etc/objects/discovery/solaris-zone1/solaris-zone1.cfg'
    [1349092215] Info :  Processing object config file '/usr/local/shinken/etc/objects/discovery/osx-x86-64-v107/osx-x86-64-v107.cfg'
    [1349092215] Info :  Processing object config file '/usr/local/shinken/etc/objects/discovery/kvm-h12/kvm-h12.cfg'
    [1349092215] Info :  Processing object config file '/usr/local/shinken/etc/objects/discovery/ga-collab-01/ga-collab-01.cfg'
    [1349092215] Info :  Processing object config file '/usr/local/shinken/etc/resource.cfg'
    [1349092215] Info :  [config] opening '/usr/local/shinken/etc/shinken-specific.cfg' configuration file
    [1349092215] Debug :  Opening local log file
    [1349092215] Info :  And arbiter is launched with the hostname:shinken-02 from an arbiter point of view of addr:shinken-02
    [1349092215] Info :  I am the spare Arbiter: Arbiter-Master
    [1349092215] Info :  And arbiter is launched with the hostname:shinken-02 from an arbiter point of view of addr:shinken-02
    [1349092215] Info :  I am the master Arbiter: arbiter-spare
    [1349092215] Info :  My own modules: PickleRetentionArbiter,CommandFile
    [1349092215] Info :  Loading the Glpi broker module
    [1349092215] Debug :  Get a pickle retention generic module for plugin PickleRetentionArbiter
    [1349092215] Info :  Get a Named pipe module for plugin CommandFile
    [1349092215] Info :  Trying to init module: PickleRetentionArbiter
    [1349092215] Info :  I correctly loaded the modules: [PickleRetentionArbiter,CommandFile]
    [1349092216] Warning : [items] service.EventLogSystem is already defined
    [1349092216] Warning : [items] service.Mysql-slow-queries is already defined
    [1349092216] Warning : [items] service.Disks is already defined
    [1349092216] Warning : [items] service.EventLogSystem is already defined
    [1349092216] Warning : [items] service.EventLogSystem is already defined
    [1349092216] Warning : [items] service.Mysql-slow-queries is already defined
    [1349092216] Warning : [items] service.Mysql-slow-queries is already defined
    [1349092216] Warning : [items] service.Mysql-slow-queries is already defined
    [1349092216] Warning : [items] service.Mysql-slow-queries is already defined
    [1349092216] Warning : [items] service.Mysql-slow-queries is already defined
    [1349092216] Warning : [items] service.Mysql-slow-queries is already defined
    [1349092216] Info :  All: (in/potential) (schedulers:2) (pollers:1/2) (reactionners:1/2) (brokers:1/2) (receivers:1/2)
    [1349092216] Info :  Running pre-flight check on configuration data...
    [1349092216] Info :  Checking global parameters...
    [1349092216] Info :  Checking hosts...
    [1349092216] Warning : [host::xen-h6] the template '' defined for 'xen-h6' is unknown
    [1349092216] Info :   Checked 117 hosts
    [1349092216] Info :  Checking hostgroups...
    [1349092216] Info :   Checked 11 hostgroups
    [1349092216] Info :  Checking contacts...
    [1349092216] Info :   Checked 4 contacts
    [1349092216] Info :  Checking contactgroups...
    [1349092216] Info :   Checked 3 contactgroups
    [1349092216] Info :  Checking notificationways...
    [1349092216] Info :   Checked 2 notificationways
    [1349092216] Info :  Checking escalations...
    [1349092216] Info :   Checked 1 escalations
    [1349092216] Info :  Checking services...
    [1349092216] Info :   Checked 894 services
    [1349092216] Info :  Checking servicegroups...
    [1349092216] Info :   Checked 0 servicegroups
    [1349092216] Info :  Checking timeperiods...
    [1349092216] Info :   Checked 5 timeperiods
    [1349092216] Info :  Checking commands...
    [1349092216] Info :   Checked 268 commands
    [1349092216] Info :  Checking hostsextinfo...
    [1349092216] Info :   Checked 0 hostsextinfo
    [1349092216] Info :  Checking servicesextinfo...
    [1349092216] Info :   Checked 0 servicesextinfo
    [1349092216] Info :  Checking servicedependencies...
    [1349092216] Info :   Checked 0 servicedependencies
    [1349092216] Info :  Checking hostdependencies...
    [1349092216] Info :   Checked 0 hostdependencies
    [1349092216] Info :  Checking arbiters...
    [1349092216] Info :   Checked 2 arbiters
    [1349092216] Info :  Checking schedulers...
    [1349092216] Info :   Checked 2 schedulers
    [1349092216] Info :  Checking reactionners...
    [1349092216] Info :   Checked 2 reactionners
    [1349092216] Info :  Checking pollers...
    [1349092216] Info :   Checked 2 pollers
    [1349092216] Info :  Checking brokers...
    [1349092216] Info :   Checked 2 brokers
    [1349092216] Info :  Checking receivers...
    [1349092216] Info :   Checked 2 receivers
    [1349092216] Info :  Checking resultmodulations...
    [1349092216] Info :   Checked 0 resultmodulations
    [1349092216] Info :  Checking discoveryrules...
    [1349092216] Info :   Checked 48 discoveryrules
    [1349092216] Info :  Checking discoveryruns...
    [1349092216] Info :   Checked 2 discoveryruns
    [1349092216] Info :  Checking businessimpactmodulations...
    [1349092216] Info :   Checked 0 businessimpactmodulations
    [1349092216] Info :  Cutting the hosts and services into parts
    [1349092216] Info :  Creating packs for realms
    [1349092216] Info :  Number of hosts in the realm All: 117 (distributed in 60 linked packs)
    [1349092216] Debug :  Trying to open the distribution file pack_distribution.dat
    [1349092216] Info :  Saving the distribution file pack_distribution.dat
    [1349092216] Info :  Number of hosts in all the realm 117
    [1349092216] Info :  Number of hosts 117
    [1349092216] Info :  Things look okay - No serious problems were detected during the pre-flight check
    [1349092216] Info :  [Arbiter] Serializing the configurations...
    [1349092216] Debug :  [All] Serializing the configuration 0
    [1349092216] Debug :  [config] time to serialize the conf All:0 is 0.220477819443
    [1349092217] Debug :  [config] time to serialize the global conf : 0.274508953094
    [1349092217] Info :  Configuration Loaded
    [1349092217] Info :  Trying to initialize additonnal groups for the daemon
    [1349092217] Warning : Stale pidfile exists (no or invalid or unreadable content). Reusing it.
    [1349092217] Info :  Initializing Pyro connection with host:10.40.90.45 port:7770 ssl:False
    [1349092217] Critical : I got an unrecoverable error. I have to exit
    [1349092217] Critical : You can log a bug ticket at https://github.com/naparuba/shinken/issues/new to get help
    [1349092217] Critical : Exception trace follows: Traceback (most recent call last):
     File "/usr/local/shinken/shinken/daemons/arbiterdaemon.py", line 485, in main
      self.do_daemon_init_and_start()
     File "/usr/local/shinken/shinken/daemon.py", line 479, in do_daemon_init_and_start
      self.setup_pyro_daemon()
     File "/usr/local/shinken/shinken/daemon.py", line 543, in setup_pyro_daemon
      self.pyro_daemon = pyro.ShinkenPyroDaemon(self.host, self.port, ssl_conf.use_ssl)
     File "/usr/local/shinken/shinken/pyro_wrapper.py", line 90, in __init__
      raise PortNotFree(msg)
    PortNotFree: Error: Sorry, the port 7770 is not free: Couldn't start Pyro daemon: [Errno 98] Address already in use
    
    Traceback (most recent call last):
     File "/usr/local/shinken/bin/shinken-arbiter", line 107, in <module>
      daemon.main()
     File "/usr/local/shinken/shinken/daemons/arbiterdaemon.py", line 485, in main
      self.do_daemon_init_and_start()
     File "/usr/local/shinken/shinken/daemon.py", line 479, in do_daemon_init_and_start
      self.setup_pyro_daemon()
     File "/usr/local/shinken/shinken/daemon.py", line 543, in setup_pyro_daemon
      self.pyro_daemon = pyro.ShinkenPyroDaemon(self.host, self.port, ssl_conf.use_ssl)
     File "/usr/local/shinken/shinken/pyro_wrapper.py", line 90, in __init__
      raise PortNotFree(msg)
    shinken.pyro_wrapper.PortNotFree: Error: Sorry, the port 7770 is not free: Couldn't start Pyro daemon: [Errno 98] Address already in use
    The netstat command on the Shinken-01 brings the followed output:
    Code:
    netstat -plan |grep 777
    tcp    0   0 10.40.90.44:7770    0.0.0.0:*        LISTEN   17880/python  
    tcp    0   0 0.0.0.0:7771      0.0.0.0:*        LISTEN   17604/python  
    tcp    0   0 0.0.0.0:7772      0.0.0.0:*        LISTEN   17731/python  
    tcp    0   0 0.0.0.0:7773      0.0.0.0:*        LISTEN   17777/python  
    tcp    0   0 10.40.90.44:7771    10.40.90.44:33112    ESTABLISHED 17604/python  
    tcp    1   0 10.40.90.44:52048    10.40.90.45:7771    CLOSE_WAIT 17880/python  
    tcp    0   0 10.40.90.44:7772    10.40.90.44:44794    ESTABLISHED 17731/python  
    tcp    1   0 10.40.90.44:60704    10.40.90.45:7772    CLOSE_WAIT 17880/python  
    tcp    0   0 10.40.90.44:7773    10.40.90.44:58771    ESTABLISHED 17777/python  
    tcp    0   0 10.40.90.44:51129    10.40.90.45:7770    ESTABLISHED 17880/python  
    tcp    0   0 10.40.90.44:33112    10.40.90.44:7771    ESTABLISHED 17880/python  
    tcp    0   0 10.40.90.44:58771    10.40.90.44:7773    ESTABLISHED 17880/python  
    tcp    1   0 10.40.90.44:33313    10.40.90.45:7773    CLOSE_WAIT 17880/python  
    tcp    0   0 10.40.90.44:44794    10.40.90.44:7772    ESTABLISHED 17880/python
    I don't know what here are wrong.
    Can somebody help me?

  2. #2
    Shinken project leader
    Join Date
    May 2011
    Location
    Bordeaux (France)
    Posts
    2,131

    Re: Prblem with the high availability

    I don't the hostadress parameter for the first arbiter.
    No direct support by personal message. Please open a thread so everyone can see the solution

  3. #3
    Junior Member
    Join Date
    Sep 2012
    Posts
    15

    Re: Prblem with the high availability

    Do you mean the hostname?

  4. #4
    Junior Member
    Join Date
    Sep 2012
    Posts
    15

    Re: Problem with the high availability

    I have add the hostname to the first arbiter but I get still the same Error :'(

  5. #5
    Junior Member
    Join Date
    Sep 2012
    Posts
    15

    Re: Problem with the high availability

    Could it be that it is normall that the Error occur?

    Because when I go on the Shinken WebUI on the System Overview I can see the scheduler master and the spare?


    But where is the arbiter?

  6. #6
    Shinken project leader
    Join Date
    May 2011
    Location
    Bordeaux (France)
    Posts
    2,131

    Re: Problem with the high availability

    Then look which process is taking the port, kill all Python and restart it. Some process keep the port, we must find which one.
    No direct support by personal message. Please open a thread so everyone can see the solution

  7. #7
    Junior Member
    Join Date
    Sep 2012
    Posts
    15

    Re: Problem with the high availability

    Thank you it works ;D

    But shoudn't I see the Arbiter on the Shinken WebUI?

  8. #8
    Shinken project leader
    Join Date
    May 2011
    Location
    Bordeaux (France)
    Posts
    2,131

    Re: Problem with the high availability

    Not the spare one, it's a missing feature currently.
    No direct support by personal message. Please open a thread so everyone can see the solution

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •