~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Open Mash Cross Reference
mash/tcl/hm/agent-hm.tcl

Component: ~ [ mash ] ~ [ apps ] ~ [ gsm ] ~ [ lib ] ~ [ otcl ] ~ [ srm ] ~ [ tcl8.3 ] ~ [ tclcl ] ~ [ tk8.3 ] ~ [ tutorials ] ~

  1 # agent-hm.tcl --
  2 #
  3 #     This file implements a Host Manager Agent, which is responsible
  4 #     for launching servents and managing resources in the AS1 
  5 #     framework.
  6 #
  7 # Copyright (c) 1997-2002 The Regents of the University of California.
  8 # All rights reserved.
  9 #
 10 # Redistribution and use in source and binary forms, with or without
 11 # modification, are permitted provided that the following conditions are met:
 12 #
 13 # A. Redistributions of source code must retain the above copyright notice,
 14 #    this list of conditions and the following disclaimer.
 15 # B. Redistributions in binary form must reproduce the above copyright notice,
 16 #    this list of conditions and the following disclaimer in the documentation
 17 #    and/or other materials provided with the distribution.
 18 # C. Neither the names of the copyright holders nor the names of its
 19 #    contributors may be used to endorse or promote products derived from this
 20 #    software without specific prior written permission.
 21 #
 22 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
 23 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 24 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 25 # ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
 26 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 27 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 28 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 29 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 30 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 31 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 32 #
 33 # @(#) $Header: /usr/mash/src/repository/mash/mash-1/tcl/hm/agent-hm.tcl,v 1.65 2002/02/26 20:25:23 weitsang Exp $ (UCB)
 34 
 35 
 36 import Timer AnnounceListenManager/AS/HM 
 37 import ServiceCreator ServiceCreator/MeGa
 38 import CoordinationBus MeGa AnnounceListenManager/AS/Platform
 39 
 40 #-----------------------------------------------------------------------
 41 # Class: 
 42 #   HMAgent
 43 #
 44 # Description:
 45 #   Host Manager Agent, part of the AS1 framework
 46 #
 47 # Members:
 48 #   maxports_ --
 49 #     Maximum port number to assign to services.
 50 #
 51 #   minport_ --
 52 #     Minimum port number to assign to services.
 53 # 
 54 #   uniqport_ --
 55 #     Last unique port number assigned to a service.
 56 # 
 57 #   portmap_ --
 58 #     This array maintains used port number so that we can
 59 #     generate an unused/unique port number.
 60 # 
 61 #   uniqid_ --
 62 #     A unique id used to create temporary filename for downloaded
 63 #     servlets. Initialized to 0 and is incremented by 1 everytime
 64 #     a temp file is created.
 65 # 
 66 #   app_ --
 67 #     The HM Application.
 68 #
 69 #   cbchannel_ --
 70 #     Channel ID for coordination bus.
 71 #
 72 #   logfd_ --
 73 #     File descriptor for a log file.  The log file is created 
 74 #     by the application.
 75 #
 76 #   creators_ -- 
 77 #     An array of ServiceCreator object for various services.
 78 #     FIXME: we should not hardcode available services in hm.
 79 #     It should be discovered dynamically.
 80 # 
 81 #   al_ --
 82 #     An array of announce/listen manager.
 83 #
 84 #   scriptfiles_ --
 85 #     An array that caches service location to filenames of previously 
 86 #     retrieve scripts.
 87 #
 88 #   scripturls --
 89 #     Predefined URL prefix for downloading scripts of type "urn:"
 90 #
 91 #   tid_ --
 92 #     An array of timers indexed by service instance.
 93 #
 94 #   launched_ --
 95 #     An array of services launched, indexed by service instance.
 96 #
 97 #   hmhosts_ --
 98 #     A list of other hosts eligible to run hm.
 99 #-----------------------------------------------------------------------
100 Class HMAgent -superclass Timer
101 
102 HMAgent instproc init { app logfd } {
103 
104     $self next
105 
106     $self instvar maxports_ minport_ uniqport_ app_ cbchannel_ logfd_
107 
108     $self instvar creators_
109     foreach service {
110         MeGa Generic Mars MediaPad Aries Device 
111         FXTemp FXForwardBackEnd FXForwardFrontEnd
112         } {
113         set creators_($service) [new ServiceCreator/$service $self]
114     }
115 
116     $self set cbchannel_ 3
117     $self set app_ $app
118     $self set logfd_ $logfd
119     $self set uniqid_ 0
120     $self set maxports_ [$self get_option maxPorts]
121     $self set minport_ [$self get_option minPort]
122     $self set uniqport_ $minport_
123 
124     $self log "Start"
125 
126     $self init_scripturls
127 
128     set f [$app get_option megaConfFile]
129     if [file exists $f] {
130         $self log "Reading config file $f."
131         $self parse_conffile $f
132     } 
133    
134     set megaspec [$self get_option megaCtrl]
135     set bw [$self get_option megaCtrlBW]
136 
137     $self instvar al_
138 
139     # FIXME I'm commenting this out since there is some *really*
140     #strange bug in that the spawning rsh code does not work -- i.e., the
141     #rsh doesn't return if we have one too many network channels open.
142     #This may be a bug in tcl/otcl, but for now, we just comment this out.
143 
144     set al_(generic) [new AnnounceListenManager/AS/HM $self $megaspec $bw]
145 
146     # FIXME Should be subclassed.
147     foreach m { audio video sdp mb } {
148         set spec [MeGa ctrlchan $m $megaspec]
149         set al_($m) [new AnnounceListenManager/AS/HM $self $spec $bw]
150     }
151     # FIXME
152     set spec [MeGa ctrlchan hm $megaspec]
153     set al_(hm) [new AnnounceListenManager/AS/HM $self $spec $bw]
154 
155     if { [$self get_option loadBalance] != "" } {
156         $al_(hm) start
157         $self read_hmhosts
158         $self init_load_check
159     } elseif { [$self get_option targetNum] != "" } {
160         $al_(hm) start
161         $self set trgtnum_ [$self get_option targetNum]
162         if { [$self get_option glunix] == "" } {
163             $self read_hmhosts
164         }
165         HMAgent instproc timeout {} { $self target_check }
166         $self randomize yes
167         set a [$self get_option checkFactor]
168         $self msched [expr $a * [$self get_option checkInterval]]
169     } else {
170         # Modified so that hm's always announce, even if there's only 1
171         $al_(hm) start
172 
173     }
174     # Added by Angie, announce our contact addr on a standard global
175     # channel.  Default is no.
176     if {[$self get_option allow_distrib] == "yes"} {
177         set gspec [$self get_option glob_chan]
178         set al_(platform) \
179             [new AnnounceListenManager/AS/Platform $self $gspec $bw $al_(hm)]
180         $al_(platform) start
181     }
182     # end
183 }
184 
185 
186 #-----------------------------------------------------------------------
187 # Method:
188 #   HMAgent init_scripturls
189 # Description:
190 #   Initialize scripturls to some predefined URLs.
191 #-----------------------------------------------------------------------
192 HMAgent instproc init_scripturls {} {
193     $self instvar scripturls_
194 
195     set scripturls_ {
196         http://www-mash.cs.berkeley.edu/dist/as/scripts
197         http://www.cs.berkeley.edu/~elan/as/scripts
198     }
199 }
200 
201 
202 #-----------------------------------------------------------------------
203 # Method:
204 #   HMAgent parse_conffile
205 # Description:
206 #   Read the configuration file.  The configuration file specifies how
207 #   this hm is connected to the other hms. (Host manager can be chained)
208 # Arguments:
209 #   f -- filename of the configuration file.
210 #-----------------------------------------------------------------------
211 HMAgent instproc parse_conffile { f } {
212     set fd [open $f r]
213     if { $fd < 0 } {
214         return
215     }
216     $self instvar conf_
217     while { [gets $fd line] > 0 } {
218         set kw [lindex $line 0]
219         switch $kw {
220         link {
221             $self add_option link yes
222             set conf_(gwctrl) [lindex $line 1]
223             set conf_(clientctrl) [lindex $line 2]
224         }
225         leaf {
226             set conf_(gwctrl) [lindex $line 1]
227         }
228         media {
229             set mtype [lindex $line 1]
230             set conf_($mtype,bw) [lindex $line 2]
231             set conf_($mtype,ofmt) [lindex $line 3]
232         }}
233     }
234     close $fd
235 }
236 
237 
238 #-----------------------------------------------------------------------
239 # Method:
240 #   HMAgent target_check
241 # Description:
242 #   Checks if the total number of hm meets the desired number.
243 #-----------------------------------------------------------------------
244 HMAgent instproc target_check {} {
245     $self instvar al_ trgtnum_
246     set n [$al_(hm) hmnum]
247     # count ourselves
248     incr n
249 
250     set r [expr [random]/double(0x7fffffff)]
251     if { $n < $trgtnum_ } {
252         # set p [expr double($trgtnum_ - $n) / $trgtnum_]
253         set p [expr double($trgtnum_ - $n) / $n]
254         # p could be > 1, but this code is correct since we want
255         # p = min(1, N/n - 1)
256         if { $r < $p } {
257             $self spawn
258         }
259     } elseif { $n > $trgtnum_ } {
260         set p [expr double($n - $trgtnum_) / $n]
261         if { $r < $p } {
262             $self doexit
263             return
264         }
265     }
266     # want the max here probably...
267     #   set t [$al_(hm) get_timer]
268     #   $self msched [$t set interval_]
269     $self msched [$self get_option checkInterval]
270 }
271 
272 
273 #-----------------------------------------------------------------------
274 # Method:
275 #   HMAgent doexit
276 # Description:
277 #   Tell everyone we are quiting, and then quit.
278 #-----------------------------------------------------------------------
279 HMAgent instproc doexit {} {
280     $self instvar al_
281     $al_(hm) announce_death
282 #$self log "exit 0"
283     exit 0
284 }
285 
286 
287 #-----------------------------------------------------------------------
288 # Method:
289 #   HMAgent log
290 # Description:
291 #   Print a log message to logfd_.  The input message $msg is prefixed
292 #   with process id and current time.
293 # Arguments:
294 #   msg -- message to be printed.
295 #-----------------------------------------------------------------------
296 HMAgent instproc log {msg} {
297     $self instvar logfd_
298     if { [$self get_option noLog] != "" } {
299         return
300     }
301     if { $msg == "" } {
302         puts $logfd_ ""
303     } else {
304         puts $logfd_ "\[[$self pid]\] [lrange [gettimeofday ascii] 1 3] $msg"
305     }
306     flush $logfd_
307 }
308 
309 
310 #-----------------------------------------------------------------------
311 # Method:
312 #   HMAgent pid
313 # Description:
314 #   Returns the process id of this process.
315 #-----------------------------------------------------------------------
316 HMAgent instproc pid {} {
317     return [pid]
318 }
319 
320 
321 #-----------------------------------------------------------------------
322 # Method:
323 #   HMAgent destroy
324 # Description:
325 #   Removes the announce listen manager stored in array al_.
326 #-----------------------------------------------------------------------
327 HMAgent instproc destroy {} {
328     $self instvar al_
329     foreach m { audio video sdp mb hm } {
330         delete $al_($m)
331     }
332 
333     $self next
334 }
335 
336 
337 #-----------------------------------------------------------------------
338 # Method:
339 #   HMAgent uniqport
340 # Description:
341 #   Return a unique port number that hasn't been returned before.
342 #   We keep the list of previously available port number in portmap_.
343 #   FIXME: portmap_ is not updated!  *BUG* 
344 #-----------------------------------------------------------------------
345 HMAgent instproc uniqport {} {
346     # Do this for now...
347     $self instvar uniqport_ portmap_ maxports_ minport_
348 
349     # Increment so that each request has a new port number
350     incr uniqport_ 4
351     set uniqport_ [expr ($uniqport_ % $maxports_) + $minport_]
352         while { [info exists portmap_($uniqport_)] } {
353         incr uniqport_ 4
354         set uniqport_ [expr ($uniqport_ % $maxports_) + $minport_]
355     }
356     return $uniqport_
357 }
358 
359 
360 #-----------------------------------------------------------------------
361 # Method:
362 #   HMAgent pick_mcastaddr
363 # Description:
364 #   Return a random multicast address.
365 #-----------------------------------------------------------------------
366 HMAgent public pick_mcastaddr {} {
367     # FIXME
368     set r1 [expr ([random]%250)+2]
369     set r2 [expr ([random]%250)+2]
370 
371     return 224.3.$r1.$r2
372 }
373 
374 
375 #-----------------------------------------------------------------------
376 # Method:
377 #   HMAgent launch
378 # Description:
379 #   Launch a service.  A service is launch only if (1) the number of 
380 #   current pending launches is less than maximum allowable pending 
381 #   launches (configurable through maxPending), (2) current CPU load 
382 #   is less than maximum allowable CPU load (configurable through 
383 #   highLoad).   This method is an "after" callback scheduled by 
384 #   schedule_launch {} method.
385 # Arguments:
386 #   srv_name -- Name of the service to launch.
387 #   srv_loc  -- Location of the service's executable.
388 #   srv_inst -- ?
389 #   msg -- ?
390 #-----------------------------------------------------------------------
391 HMAgent private launch { srv_name srv_loc srv_inst msg } {
392 
393     if { [$self pending_launches] >= [$self get_option maxPending] } {
394         $self log "BACKLOG [$self pending_launches]"
395         $self cancel_timer $srv_inst
396         return
397     }
398     set load [HMAgent get_load]
399     set hiload [$self get_option highLoad]
400     $self log "LAUNCH load=$load $hiload"
401     if { [$self get_option noLoad] == "" &&  $load >= $hiload } {
402         $self cancel_timer $srv_inst
403         return
404     }
405 
406     $self instvar creators_
407     if ![info exists creators_($srv_name)] {
408         $self log "ERROR unrecorgnize service $srv_name"
409         return
410     }
411     set creator $creators_($srv_name)
412     set h [$creator create_handler $srv_inst $msg]
413 
414     $h set gwctrl_ [$self get_option megaCtrl]
415     if { [$self get_option link] == "yes" } {
416         $self instvar conf_
417         $h set link_ 1
418         $h set gwctrl_ $conf_(gwctrl)
419         $h set bw_ $conf_($mtype,bw)
420         $h set ofmt_ $conf_($mtype,ofmt)
421 
422         set a [split $conf_(clientctrl) /]
423         if [in_multicast [lindex $a 0]] {
424             $h set clientctrl_ $conf_(clientctrl)
425             $h set rportspec_ 0
426         } else {
427             set baseport [$self uniqport]
428             set addr [lindex $a 0]
429                 set ports [split [lindex $a 1] :]
430             set sport [lindex $ports 0]
431                 set rport [lindex $ports 1]
432             if { $rport == "*" } {
433                 set rport [expr $baseport + 2]
434             }
435             $h set clientctrl_ $addr/$sport:$rport/1
436             $h set rportspec_ $baseport:$rport
437         }
438     }
439 
440     set script [$self get_script $srv_name $srv_loc]
441     if { $script == "" || [$h exec $script] < 0} {
442         delete $h
443         $self cancel_timer $srv_inst
444         return 0
445     } else {
446         lappend handlers_ $h
447     }
448     $self log "announce_launch $srv_inst"
449     $self instvar al_
450     $al_(hm) announce_launch $srv_inst
451     $self set launched_($srv_inst) 1
452     # after 10000 "$self cancel_timer $srv_inst"
453     return 1
454 }
455 
456 # to autoload the ::http namespace
457 ::http::formatQuery sdsds
458 
459 
460 #-----------------------------------------------------------------------
461 # Method:
462 #   HMAgent get_script
463 # Description:
464 #   Return a filename on local disk that corresponds to the executable
465 #   for service specified by $name and $srv_loc.  Retrieve the script
466 #   from an HTTP server and stored it as temporary file if have to.
467 #   Previously retrieve scripts are cached in array scriptfiles_.
468 # Arguments:
469 #   srv_name -- Name of the service.
470 #   srv_loc  -- Location of the service's executable.  Format is
471 #      static:<filename> http:<url> urn:<filename|url>.
472 #-----------------------------------------------------------------------
473 HMAgent instproc get_script { srv_name srv_loc } {
474     set o [split $srv_loc :]
475     $self instvar scriptfiles_
476     # Check if we already have it
477     if [info exists scriptfiles_($srv_loc)] {
478         return $scriptfiles_($srv_loc)
479     }
480     $self log "get_script $srv_name $srv_loc"
481     switch [lindex $o 0] {
482     static {
483         set path [$self get_option execPath]
484         set n $path/[lindex $o 1]
485         if [file isfile $n] {
486             set scriptfiles_($srv_loc) $n
487             return $n
488         }
489         return ""
490     }
491     http {
492         set d [$self get_option scriptDir]
493         if ![file isdirectory $d] {
494             file mkdir $d
495         }
496         $self instvar uniqid_
497         set fname $d/as-$uniqid_.mash
498         incr uniqid_
499         set fd [open $fname w+]
500         set t [::http::geturl $srv_loc -channel $fd]
501         close $fd
502         set code [lindex [::http::code $t] 1]
503         if { $code == "200" } {
504             $self log "got script from $srv_loc"
505             set scriptfiles_($srv_loc) $fname
506             ::http::reset $t
507             return $fname
508         } else {
509             $self log "can't get script from $srv_loc."
510             ::http::reset $t
511             return ""
512         }
513     }
514     urn {
515         set n [lindex $o 1]
516         # try static first, then url.
517         set s [$self get_script $srv_name static:$n]
518         if { $s != "" } {
519             $self log "got $srv_name/$srv_loc from static:$n: $s"
520             set scriptfiles_($srv_loc) $s
521             return $s
522         }
523         $self instvar scripturls_
524         foreach url $scripturls_ {
525             set s [$self get_script $srv_name $url/$n]
526             if { $s != "" } {
527                 $self log "got $srv_name/$srv_loc from $url: $s"
528                 set scriptfiles_($srv_loc) $s
529                 return $s
530             }
531         }
532 
533     }
534     }
535     return ""
536 }
537 
538 
539 #-----------------------------------------------------------------------
540 # Method:
541 #   HMAgent unregister
542 # Description:
543 #   Remove a handler.
544 # Arguments:
545 #   aspec -- "<pid>@<hostname>", this uniquely identifies a handler.
546 #   msg -- Ununsed junk.
547 #-----------------------------------------------------------------------
548 HMAgent instproc unregister { aspec msg } {
549     $self instvar handlers_
550     if ![info exists handlers_] {
551         return
552     }
553     set i 0
554     # FIXME
555     set pid [lindex [split $aspec @] 0]
556     foreach h $handlers_ {
557         if { [$h set pid_] == $pid } {
558             delete $h
559             set handlers_ [lreplace $handlers_ $i $i]
560             return
561         }
562         incr i
563     }
564 }
565 
566 
567 #-----------------------------------------------------------------------
568 # Method:
569 #   HMAgent pending_timer
570 # Description:
571 #   Check if there is a timer with id == $tid pending.
572 # Arguments:
573 #   tid -- ID of the timer to check.
574 #-----------------------------------------------------------------------
575 HMAgent instproc pending_timer tid {
576     $self instvar tid_
577     return [info exists tid_($tid)]
578 }
579 
580 
581 #-----------------------------------------------------------------------
582 # Method:
583 #   HMAgent pending_launches
584 # Description:
585 #   Return the number of launches pending.
586 #-----------------------------------------------------------------------
587 HMAgent instproc pending_launches {} {
588     $self instvar launched_
589     return [llength [array names launched_]]
590 }
591 
592 
593 #-----------------------------------------------------------------------
594 # Method:
595 #   HMAgent cancel_timer
596 # Description:
597 #   Remove the "after" callback and information regarding a scheduled
598 #   launched identified by $tid.
599 # Arguments:
600 #   tid -- ID of the timer to cancel.
601 #-----------------------------------------------------------------------
602 HMAgent instproc cancel_timer {tid} {
603     $self instvar tid_ launched_
604     if [info exists launched_($tid)] {
605         unset launched_($tid)
606     }
607     if { [info exists tid_($tid)] } {
608         $self log "cancelled timer $tid"
609         after cancel $tid_($tid)
610         unset tid_($tid)
611     }
612 }
613 
614 
615 #-----------------------------------------------------------------------
616 # Method:
617 #   HMAgent sched_launch
618 # Description:
619 #   Schedule a launch by creating an "after" callback.  The timeout 
620 #   period for launching is randomly, uniformly chosen from [0, 2000*N],
621 #   where N is the estimate number of host manager currently running.
622 # Arguments:
623 #   srv_name -- Name of the service to launch.
624 #   srv_loc  -- Where to find the executable of the service.
625 #   srv_inst -- A unique ID to the service.
626 #   msg -- Data for the service.
627 #-----------------------------------------------------------------------
628 HMAgent public sched_launch { srv_name srv_loc srv_inst msg } {
629     $self instvar al_
630     set numhm [$al_(hm) hmnum]
631     set T [expr $numhm * 2000]
632     set max [$self get_option maxWait]
633     if { $T > $max } {
634         set $T $max
635     }
636     # FIXME bias T by load.
637 
638     # set lambda [$self get_option lambda]
639     # set r [HMAgent exp_timer $lambda $T]
640     set r [HMAgent uniform_timer $T]
641     $self log "timer $srv_name $srv_inst $r"
642 
643     set tid [after $r "$self launch $srv_name $srv_loc $srv_inst {$msg}"]
644     $self instvar tid_
645     set tid_($srv_inst) $tid
646 }
647 
648 #
649 # Exponential launch timer
650 # F(x) = 1/(exp(lambda) - 1) * (exp(lambda/T * x) - 1)
651 #
652 # => x = T/lambda log((exp(lambda) - 1) F(x) + 1)
653 #
654 # HMAgent proc exp_timer { lambda T } {
655 #    set r [expr [random]/double(0x7fffffff)]
656 #     set o [expr ($T/$lambda) * log((exp($lambda) - 1)*$r + 1)]
657 #     return [expr int($o+0.5)]
658 # }
659 
660 
661 #-----------------------------------------------------------------------
662 # Method:
663 #   HMAgent uniform_timer
664 # Description:
665 #   Uniformly pick a random integer between 0 and T and return it.
666 # Arguments:
667 #   T -- upper bound for the random number.
668 #-----------------------------------------------------------------------
669 HMAgent proc uniform_timer { T } {
670     set r [expr [random]/double(0x7fffffff)]
671     set o [expr $r*$T]
672     return [expr int($o+0.5)]
673 }
674 
675 
676 #-----------------------------------------------------------------------
677 # Method:
678 #   HMAgent suppress_timer
679 # Description:
680 #   Cancel our own launch, because someone else has already launch the
681 #   same service.
682 # Arguments:
683 #   tid -- ID to the service to cancel.
684 #-----------------------------------------------------------------------
685 HMAgent public suppress_timer { tid } {
686     $self instvar tid_ launched_
687     $self log "suppress timer $tid"
688     if { [info exists tid_($tid)] && ![info exists launched_($tid)] } {
689         after cancel $tid_($tid)
690         unset tid_($tid)
691     }
692 }
693 
694 
695 #-----------------------------------------------------------------------
696 # Method:
697 #   HMAgent open_cb
698 #   HMAgent close_cb
699 # Description:
700 #   Open and close a coordination bus.
701 # Arguments:
702 #   cb -- Coordination bus to close.
703 #   handler -- Unused junk.
704 #-----------------------------------------------------------------------
705 HMAgent instproc close_cb cb {
706     set c [$cb set channel_]
707 
708     $self instvar chanmap_
709     incr chanmap_($c) -1
710     if { $chanmap_($c) <= 0 } {
711         delete $cb
712         unset chanmap_($c)
713     }
714 }
715 
716 HMAgent instproc open_cb { handler } {
717     $self instvar cbchannel_ chanmap_
718     set cb [new CoordinationBus -channel $cbchannel_]
719     set chanmap_($cbchannel_) 1
720 
721     # FIXME
722     incr cbchannel_
723 
724     return $cb
725 }
726 
727 
728 #-----------------------------------------------------------------------
729 # Method:
730 #   HMAgent read_hmhosts
731 # Description:
732 #   Read a file called "hmhosts", which contains a list of hosts hm can
733 #   run on.  The list of hosts are stored into a list called hmhosts_.
734 #-----------------------------------------------------------------------
735 HMAgent instproc read_hmhosts {} {
736     $self instvar hmhosts_ 
737 
738     set path [$self get_option execPath]
739 
740     set f "$path/hmhosts"
741     if { $f == "" } {
742         $self log "hm: warning: no host file - disabling load_check"
743         return
744     }
745 
746     set fd [open $f r]
747     if { $fd <  0 } {
748         $self log  "hm: problems opening $f"
749         return
750     }
751 
752     while { [gets $fd line] > 0 } {
753         if { [intoa [lookup_host_addr $line]] != [localaddr] } {
754             lappend hmhosts_ $line
755         }
756     }
757     close $fd
758 }
759 
760 
761 #-----------------------------------------------------------------------
762 # Method:
763 #   HMAgent init_load_check
764 # Description:
765 #   Create a time to call method load_check{ } periodically.
766 #-----------------------------------------------------------------------
767 HMAgent instproc init_load_check {} {
768 
769     set low_ 0
770     set high_ 0
771 
772     set t [$self get_option checkInterval]
773     after $t "$self load_check"
774 }
775 
776 
777 #-----------------------------------------------------------------------
778 # Method:
779 #   HMAgent load_check
780 # Description:
781 #   Checks the current load on this machine.  If the load is too high,
782 #   Sheds some load by randomly killing of servents (see shed_load{}).
783 #   If the load is too low, kill self. (Huh?).
784 #-----------------------------------------------------------------------
785 HMAgent instproc load_check {} {
786     $self instvar app_ low_ high_ al_
787 
788     set load [$self get_load]
789     set nsamples [$self get_option loadSamples]
790     if { $load > [$self get_option highLoad] } {
791         incr high_
792         if { $high_ >= $nsamples } {
793 #$self log "$self spawn"
794             if { [$self spawn] != 0 } {
795 #$self log "shed_load"
796                 $self shed_load
797             }
798         }
799     } elseif { $load < [$self get_option lowLoad] } {
800         incr low_
801         if { $low_ >= $nsamples } {
802             # Don't die if the number of hm's is at the minimum.
803             set minhm [$self get_option minHmNum]
804             set hmnum [$al_(hm) hmnum]
805             # count ourselves
806             incr hmnum
807             if { $hmnum > $minhm } {
808 #$self log "$self die"
809                 $self die
810                 return
811             }
812         }
813     } else {
814         set low_ 0
815         set high_ 0
816     }
817 
818     set t [$self get_option checkInterval]
819     after $t "$self load_check"
820 }
821 
822 #HMAgent instproc get_load {} {
823 #   set v [catch {open "|vmstat cpu"} fd]
824 #   if { $v != 0 } {
825 #       return 0
826 #   }
827 #   # Want last field of line #4
828 #   gets $fd
829 #   gets $fd
830 #   set l [gets $fd]
831 #   close $fd
832 #   set n [llength $l]
833 #   return [expr 100-[lindex $l [expr $n-1]]]
834 #}
835 
836 
837 #-----------------------------------------------------------------------
838 # Method:
839 #   HMAgent get_load
840 # Description:
841 #   Call uptime and return the current load on this machine.
842 #-----------------------------------------------------------------------
843 HMAgent proc get_load {} {
844     set v [catch {open "|uptime"} fd]
845     if { $v != 0 } {
846         return 0
847     }
848     set l [gets $fd]
849     close $fd
850     # Format:
851     # FIXMEX load averages: 0.00, 0.00, 0.00
852     set n [llength $l]
853     set avg [string trim [lindex $l [expr $n - 3]] ,]
854     return $avg
855 }
856 
857 
858 #-----------------------------------------------------------------------
859 # Method:
860 #   HMAgent spawn
861 # Description:
862 #   Spawn a copy of hm on another random host.
863 #-----------------------------------------------------------------------
864 HMAgent instproc spawn {} {
865     $self instvar al_ hmhosts_ app_
866 
867     # build hmhosts_ - hmlist
868     set hmlist [$al_(hm) hmaddrs]
869 
870     if { [$self get_option glunix] != "" } {
871         set tlist [eval exec "glustat -s l -l"]
872     } else {
873         set tlist $hmhosts_
874     }
875     # remove any local entries we may have
876     set i [lsearch -exact $tlist [localaddr]]
877     set tlist [lreplace $tlist $i $i]
878 
879     foreach h $hmlist {
880         set i [lsearch -exact $tlist $h]
881         set tlist [lreplace $tlist $i $i]
882     }
883 #$self log "hmlist=$hmlist"
884 #$self log "hmhosts=$hmhosts_"
885 #$self log "tlist=$tlist"
886     set n [llength $tlist]
887     if { $n == 0 } {
888         # no hosts to spawn to
889         return 0
890     }
891     if { [$self get_option glunix] != "" } {
892         # glustat already sorted by load.
893         set r 0
894     } else {
895         set r [expr [random] % $n]
896     }
897     set shost [lindex $tlist $r]
898 
899     $self dospawn $shost
900 
901     return 1
902 }
903 
904 
905 #-----------------------------------------------------------------------
906 # Method:
907 #   HMAgent dospawn
908 # Description:
909 #   Called by spawn{}, spawn a copy of hm on $shost.
910 #-----------------------------------------------------------------------
911 HMAgent private dospawn { shost } {
912     $self instvar app_ al_
913     set path [$self get_option execPath]
914     set argv [$self get_option execArgs]
915     set cmd [$self get_option execCmd]
916 
917     $self log "eval exec $cmd $shost $path/smash $path/hm $argv >& /dev/null &"
918     if { [catch "eval exec $cmd $shost $path/smash $path/hm $argv >& /dev/null &" t] != 0 } {
919         $self log "catch error: $t"
920     }
921 }
922 
923 
924 #-----------------------------------------------------------------------
925 # Method:
926 #   HMAgent shed_load
927 # Description:
928 #   Randomly kills of servents.
929 #-----------------------------------------------------------------------
930 HMAgent instproc shed_load {} {
931     $self instvar handlers_
932     foreach h $handlers_ {
933         set pid [$h set pid_]
934         # Shed 50% of the load randomly.
935         set r [expr [random]/double(0x7fffffff)]
936         if { $r < 0.5 } {
937             $self log "exec kill -9 $pid"
938             catch "eval exec kill -9 $pid"
939         }
940     }
941 }
942 
943 
944 #-----------------------------------------------------------------------
945 # Method:
946 #   HMAgent die
947 # Description:
948 #   Schedule a callback to kill self.
949 #-----------------------------------------------------------------------
950 HMAgent instproc die {} {
951     $self instvar al_ dying_ app_
952 
953     set t [$self get_option deathInterval]
954     set minwait [$self get_option minDeathWait]
955     set r [expr ([random] % $t) + $minwait]
956     set dying_ [after $r "$self really_die"]
957 }
958 
959 
960 #-----------------------------------------------------------------------
961 # Method:
962 #   HMAgent really_die
963 # Description:
964 #   Really kill self.
965 #-----------------------------------------------------------------------
966 HMAgent instproc really_die {} {
967     $self instvar app_ al_ dying_ low_ high_
968 
969     $al_(hm) announce_death
970     set minhm [$self get_option minHmNum]
971     set hmnum [$al_(hm) hmnum]
972     # count ourselves
973     incr hmnum
974     # one last check
975     if { $hmnum > $minhm && \
976          [$self get_load] < [$self get_option lowLoad] } {
977         $self log "exit 0"
978             $self doexit
979     }
980     # get back in the mix!
981     unset dying_
982     set low_ 0
983     set high_ 0
984     $self load_check
985 }
986 
987 
988 #-----------------------------------------------------------------------
989 # Method:
990 #   HMAgent recv_death
991 # Description:
992 #   If someone has already kill itself, it is possible that we do not
993 #   have to kill ourself.  Recheck.
994 #-----------------------------------------------------------------------
995 HMAgent instproc recv_death {} {
996     $self instvar dying_ low_ high_
997 
998     if ![info exists dying_] {
999         return
1000     }
1001 
1002     # Someone else is dying, we get a reprieve...
1003     after cancel $dying_
1004     unset dying_
1005     set low_ 0
1006     set high_ 0
1007     $self load_check
1008 }
1009 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.