1 # agent-hm.tcl --
2 #
3 # This file implements a Host Manager Agent, which is responsible
4 # for launching servents and managing resources in the AS1
5 # framework.
6 #
7 # Copyright (c) 1997-2002 The Regents of the University of California.
8 # All rights reserved.
9 #
10 # Redistribution and use in source and binary forms, with or without
11 # modification, are permitted provided that the following conditions are met:
12 #
13 # A. Redistributions of source code must retain the above copyright notice,
14 # this list of conditions and the following disclaimer.
15 # B. Redistributions in binary form must reproduce the above copyright notice,
16 # this list of conditions and the following disclaimer in the documentation
17 # and/or other materials provided with the distribution.
18 # C. Neither the names of the copyright holders nor the names of its
19 # contributors may be used to endorse or promote products derived from this
20 # software without specific prior written permission.
21 #
22 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
23 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 # ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
26 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
28 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #
33 # @(#) $Header: /usr/mash/src/repository/mash/mash-1/tcl/hm/agent-hm.tcl,v 1.65 2002/02/26 20:25:23 weitsang Exp $ (UCB)
34
35
36 import Timer AnnounceListenManager/AS/HM
37 import ServiceCreator ServiceCreator/MeGa
38 import CoordinationBus MeGa AnnounceListenManager/AS/Platform
39
40 #-----------------------------------------------------------------------
41 # Class:
42 # HMAgent
43 #
44 # Description:
45 # Host Manager Agent, part of the AS1 framework
46 #
47 # Members:
48 # maxports_ --
49 # Maximum port number to assign to services.
50 #
51 # minport_ --
52 # Minimum port number to assign to services.
53 #
54 # uniqport_ --
55 # Last unique port number assigned to a service.
56 #
57 # portmap_ --
58 # This array maintains used port number so that we can
59 # generate an unused/unique port number.
60 #
61 # uniqid_ --
62 # A unique id used to create temporary filename for downloaded
63 # servlets. Initialized to 0 and is incremented by 1 everytime
64 # a temp file is created.
65 #
66 # app_ --
67 # The HM Application.
68 #
69 # cbchannel_ --
70 # Channel ID for coordination bus.
71 #
72 # logfd_ --
73 # File descriptor for a log file. The log file is created
74 # by the application.
75 #
76 # creators_ --
77 # An array of ServiceCreator object for various services.
78 # FIXME: we should not hardcode available services in hm.
79 # It should be discovered dynamically.
80 #
81 # al_ --
82 # An array of announce/listen manager.
83 #
84 # scriptfiles_ --
85 # An array that caches service location to filenames of previously
86 # retrieve scripts.
87 #
88 # scripturls --
89 # Predefined URL prefix for downloading scripts of type "urn:"
90 #
91 # tid_ --
92 # An array of timers indexed by service instance.
93 #
94 # launched_ --
95 # An array of services launched, indexed by service instance.
96 #
97 # hmhosts_ --
98 # A list of other hosts eligible to run hm.
99 #-----------------------------------------------------------------------
100 Class HMAgent -superclass Timer
101
102 HMAgent instproc init { app logfd } {
103
104 $self next
105
106 $self instvar maxports_ minport_ uniqport_ app_ cbchannel_ logfd_
107
108 $self instvar creators_
109 foreach service {
110 MeGa Generic Mars MediaPad Aries Device
111 FXTemp FXForwardBackEnd FXForwardFrontEnd
112 } {
113 set creators_($service) [new ServiceCreator/$service $self]
114 }
115
116 $self set cbchannel_ 3
117 $self set app_ $app
118 $self set logfd_ $logfd
119 $self set uniqid_ 0
120 $self set maxports_ [$self get_option maxPorts]
121 $self set minport_ [$self get_option minPort]
122 $self set uniqport_ $minport_
123
124 $self log "Start"
125
126 $self init_scripturls
127
128 set f [$app get_option megaConfFile]
129 if [file exists $f] {
130 $self log "Reading config file $f."
131 $self parse_conffile $f
132 }
133
134 set megaspec [$self get_option megaCtrl]
135 set bw [$self get_option megaCtrlBW]
136
137 $self instvar al_
138
139 # FIXME I'm commenting this out since there is some *really*
140 #strange bug in that the spawning rsh code does not work -- i.e., the
141 #rsh doesn't return if we have one too many network channels open.
142 #This may be a bug in tcl/otcl, but for now, we just comment this out.
143
144 set al_(generic) [new AnnounceListenManager/AS/HM $self $megaspec $bw]
145
146 # FIXME Should be subclassed.
147 foreach m { audio video sdp mb } {
148 set spec [MeGa ctrlchan $m $megaspec]
149 set al_($m) [new AnnounceListenManager/AS/HM $self $spec $bw]
150 }
151 # FIXME
152 set spec [MeGa ctrlchan hm $megaspec]
153 set al_(hm) [new AnnounceListenManager/AS/HM $self $spec $bw]
154
155 if { [$self get_option loadBalance] != "" } {
156 $al_(hm) start
157 $self read_hmhosts
158 $self init_load_check
159 } elseif { [$self get_option targetNum] != "" } {
160 $al_(hm) start
161 $self set trgtnum_ [$self get_option targetNum]
162 if { [$self get_option glunix] == "" } {
163 $self read_hmhosts
164 }
165 HMAgent instproc timeout {} { $self target_check }
166 $self randomize yes
167 set a [$self get_option checkFactor]
168 $self msched [expr $a * [$self get_option checkInterval]]
169 } else {
170 # Modified so that hm's always announce, even if there's only 1
171 $al_(hm) start
172
173 }
174 # Added by Angie, announce our contact addr on a standard global
175 # channel. Default is no.
176 if {[$self get_option allow_distrib] == "yes"} {
177 set gspec [$self get_option glob_chan]
178 set al_(platform) \
179 [new AnnounceListenManager/AS/Platform $self $gspec $bw $al_(hm)]
180 $al_(platform) start
181 }
182 # end
183 }
184
185
186 #-----------------------------------------------------------------------
187 # Method:
188 # HMAgent init_scripturls
189 # Description:
190 # Initialize scripturls to some predefined URLs.
191 #-----------------------------------------------------------------------
192 HMAgent instproc init_scripturls {} {
193 $self instvar scripturls_
194
195 set scripturls_ {
196 http://www-mash.cs.berkeley.edu/dist/as/scripts
197 http://www.cs.berkeley.edu/~elan/as/scripts
198 }
199 }
200
201
202 #-----------------------------------------------------------------------
203 # Method:
204 # HMAgent parse_conffile
205 # Description:
206 # Read the configuration file. The configuration file specifies how
207 # this hm is connected to the other hms. (Host manager can be chained)
208 # Arguments:
209 # f -- filename of the configuration file.
210 #-----------------------------------------------------------------------
211 HMAgent instproc parse_conffile { f } {
212 set fd [open $f r]
213 if { $fd < 0 } {
214 return
215 }
216 $self instvar conf_
217 while { [gets $fd line] > 0 } {
218 set kw [lindex $line 0]
219 switch $kw {
220 link {
221 $self add_option link yes
222 set conf_(gwctrl) [lindex $line 1]
223 set conf_(clientctrl) [lindex $line 2]
224 }
225 leaf {
226 set conf_(gwctrl) [lindex $line 1]
227 }
228 media {
229 set mtype [lindex $line 1]
230 set conf_($mtype,bw) [lindex $line 2]
231 set conf_($mtype,ofmt) [lindex $line 3]
232 }}
233 }
234 close $fd
235 }
236
237
238 #-----------------------------------------------------------------------
239 # Method:
240 # HMAgent target_check
241 # Description:
242 # Checks if the total number of hm meets the desired number.
243 #-----------------------------------------------------------------------
244 HMAgent instproc target_check {} {
245 $self instvar al_ trgtnum_
246 set n [$al_(hm) hmnum]
247 # count ourselves
248 incr n
249
250 set r [expr [random]/double(0x7fffffff)]
251 if { $n < $trgtnum_ } {
252 # set p [expr double($trgtnum_ - $n) / $trgtnum_]
253 set p [expr double($trgtnum_ - $n) / $n]
254 # p could be > 1, but this code is correct since we want
255 # p = min(1, N/n - 1)
256 if { $r < $p } {
257 $self spawn
258 }
259 } elseif { $n > $trgtnum_ } {
260 set p [expr double($n - $trgtnum_) / $n]
261 if { $r < $p } {
262 $self doexit
263 return
264 }
265 }
266 # want the max here probably...
267 # set t [$al_(hm) get_timer]
268 # $self msched [$t set interval_]
269 $self msched [$self get_option checkInterval]
270 }
271
272
273 #-----------------------------------------------------------------------
274 # Method:
275 # HMAgent doexit
276 # Description:
277 # Tell everyone we are quiting, and then quit.
278 #-----------------------------------------------------------------------
279 HMAgent instproc doexit {} {
280 $self instvar al_
281 $al_(hm) announce_death
282 #$self log "exit 0"
283 exit 0
284 }
285
286
287 #-----------------------------------------------------------------------
288 # Method:
289 # HMAgent log
290 # Description:
291 # Print a log message to logfd_. The input message $msg is prefixed
292 # with process id and current time.
293 # Arguments:
294 # msg -- message to be printed.
295 #-----------------------------------------------------------------------
296 HMAgent instproc log {msg} {
297 $self instvar logfd_
298 if { [$self get_option noLog] != "" } {
299 return
300 }
301 if { $msg == "" } {
302 puts $logfd_ ""
303 } else {
304 puts $logfd_ "\[[$self pid]\] [lrange [gettimeofday ascii] 1 3] $msg"
305 }
306 flush $logfd_
307 }
308
309
310 #-----------------------------------------------------------------------
311 # Method:
312 # HMAgent pid
313 # Description:
314 # Returns the process id of this process.
315 #-----------------------------------------------------------------------
316 HMAgent instproc pid {} {
317 return [pid]
318 }
319
320
321 #-----------------------------------------------------------------------
322 # Method:
323 # HMAgent destroy
324 # Description:
325 # Removes the announce listen manager stored in array al_.
326 #-----------------------------------------------------------------------
327 HMAgent instproc destroy {} {
328 $self instvar al_
329 foreach m { audio video sdp mb hm } {
330 delete $al_($m)
331 }
332
333 $self next
334 }
335
336
337 #-----------------------------------------------------------------------
338 # Method:
339 # HMAgent uniqport
340 # Description:
341 # Return a unique port number that hasn't been returned before.
342 # We keep the list of previously available port number in portmap_.
343 # FIXME: portmap_ is not updated! *BUG*
344 #-----------------------------------------------------------------------
345 HMAgent instproc uniqport {} {
346 # Do this for now...
347 $self instvar uniqport_ portmap_ maxports_ minport_
348
349 # Increment so that each request has a new port number
350 incr uniqport_ 4
351 set uniqport_ [expr ($uniqport_ % $maxports_) + $minport_]
352 while { [info exists portmap_($uniqport_)] } {
353 incr uniqport_ 4
354 set uniqport_ [expr ($uniqport_ % $maxports_) + $minport_]
355 }
356 return $uniqport_
357 }
358
359
360 #-----------------------------------------------------------------------
361 # Method:
362 # HMAgent pick_mcastaddr
363 # Description:
364 # Return a random multicast address.
365 #-----------------------------------------------------------------------
366 HMAgent public pick_mcastaddr {} {
367 # FIXME
368 set r1 [expr ([random]%250)+2]
369 set r2 [expr ([random]%250)+2]
370
371 return 224.3.$r1.$r2
372 }
373
374
375 #-----------------------------------------------------------------------
376 # Method:
377 # HMAgent launch
378 # Description:
379 # Launch a service. A service is launch only if (1) the number of
380 # current pending launches is less than maximum allowable pending
381 # launches (configurable through maxPending), (2) current CPU load
382 # is less than maximum allowable CPU load (configurable through
383 # highLoad). This method is an "after" callback scheduled by
384 # schedule_launch {} method.
385 # Arguments:
386 # srv_name -- Name of the service to launch.
387 # srv_loc -- Location of the service's executable.
388 # srv_inst -- ?
389 # msg -- ?
390 #-----------------------------------------------------------------------
391 HMAgent private launch { srv_name srv_loc srv_inst msg } {
392
393 if { [$self pending_launches] >= [$self get_option maxPending] } {
394 $self log "BACKLOG [$self pending_launches]"
395 $self cancel_timer $srv_inst
396 return
397 }
398 set load [HMAgent get_load]
399 set hiload [$self get_option highLoad]
400 $self log "LAUNCH load=$load $hiload"
401 if { [$self get_option noLoad] == "" && $load >= $hiload } {
402 $self cancel_timer $srv_inst
403 return
404 }
405
406 $self instvar creators_
407 if ![info exists creators_($srv_name)] {
408 $self log "ERROR unrecorgnize service $srv_name"
409 return
410 }
411 set creator $creators_($srv_name)
412 set h [$creator create_handler $srv_inst $msg]
413
414 $h set gwctrl_ [$self get_option megaCtrl]
415 if { [$self get_option link] == "yes" } {
416 $self instvar conf_
417 $h set link_ 1
418 $h set gwctrl_ $conf_(gwctrl)
419 $h set bw_ $conf_($mtype,bw)
420 $h set ofmt_ $conf_($mtype,ofmt)
421
422 set a [split $conf_(clientctrl) /]
423 if [in_multicast [lindex $a 0]] {
424 $h set clientctrl_ $conf_(clientctrl)
425 $h set rportspec_ 0
426 } else {
427 set baseport [$self uniqport]
428 set addr [lindex $a 0]
429 set ports [split [lindex $a 1] :]
430 set sport [lindex $ports 0]
431 set rport [lindex $ports 1]
432 if { $rport == "*" } {
433 set rport [expr $baseport + 2]
434 }
435 $h set clientctrl_ $addr/$sport:$rport/1
436 $h set rportspec_ $baseport:$rport
437 }
438 }
439
440 set script [$self get_script $srv_name $srv_loc]
441 if { $script == "" || [$h exec $script] < 0} {
442 delete $h
443 $self cancel_timer $srv_inst
444 return 0
445 } else {
446 lappend handlers_ $h
447 }
448 $self log "announce_launch $srv_inst"
449 $self instvar al_
450 $al_(hm) announce_launch $srv_inst
451 $self set launched_($srv_inst) 1
452 # after 10000 "$self cancel_timer $srv_inst"
453 return 1
454 }
455
456 # to autoload the ::http namespace
457 ::http::formatQuery sdsds
458
459
460 #-----------------------------------------------------------------------
461 # Method:
462 # HMAgent get_script
463 # Description:
464 # Return a filename on local disk that corresponds to the executable
465 # for service specified by $name and $srv_loc. Retrieve the script
466 # from an HTTP server and stored it as temporary file if have to.
467 # Previously retrieve scripts are cached in array scriptfiles_.
468 # Arguments:
469 # srv_name -- Name of the service.
470 # srv_loc -- Location of the service's executable. Format is
471 # static:<filename> http:<url> urn:<filename|url>.
472 #-----------------------------------------------------------------------
473 HMAgent instproc get_script { srv_name srv_loc } {
474 set o [split $srv_loc :]
475 $self instvar scriptfiles_
476 # Check if we already have it
477 if [info exists scriptfiles_($srv_loc)] {
478 return $scriptfiles_($srv_loc)
479 }
480 $self log "get_script $srv_name $srv_loc"
481 switch [lindex $o 0] {
482 static {
483 set path [$self get_option execPath]
484 set n $path/[lindex $o 1]
485 if [file isfile $n] {
486 set scriptfiles_($srv_loc) $n
487 return $n
488 }
489 return ""
490 }
491 http {
492 set d [$self get_option scriptDir]
493 if ![file isdirectory $d] {
494 file mkdir $d
495 }
496 $self instvar uniqid_
497 set fname $d/as-$uniqid_.mash
498 incr uniqid_
499 set fd [open $fname w+]
500 set t [::http::geturl $srv_loc -channel $fd]
501 close $fd
502 set code [lindex [::http::code $t] 1]
503 if { $code == "200" } {
504 $self log "got script from $srv_loc"
505 set scriptfiles_($srv_loc) $fname
506 ::http::reset $t
507 return $fname
508 } else {
509 $self log "can't get script from $srv_loc."
510 ::http::reset $t
511 return ""
512 }
513 }
514 urn {
515 set n [lindex $o 1]
516 # try static first, then url.
517 set s [$self get_script $srv_name static:$n]
518 if { $s != "" } {
519 $self log "got $srv_name/$srv_loc from static:$n: $s"
520 set scriptfiles_($srv_loc) $s
521 return $s
522 }
523 $self instvar scripturls_
524 foreach url $scripturls_ {
525 set s [$self get_script $srv_name $url/$n]
526 if { $s != "" } {
527 $self log "got $srv_name/$srv_loc from $url: $s"
528 set scriptfiles_($srv_loc) $s
529 return $s
530 }
531 }
532
533 }
534 }
535 return ""
536 }
537
538
539 #-----------------------------------------------------------------------
540 # Method:
541 # HMAgent unregister
542 # Description:
543 # Remove a handler.
544 # Arguments:
545 # aspec -- "<pid>@<hostname>", this uniquely identifies a handler.
546 # msg -- Ununsed junk.
547 #-----------------------------------------------------------------------
548 HMAgent instproc unregister { aspec msg } {
549 $self instvar handlers_
550 if ![info exists handlers_] {
551 return
552 }
553 set i 0
554 # FIXME
555 set pid [lindex [split $aspec @] 0]
556 foreach h $handlers_ {
557 if { [$h set pid_] == $pid } {
558 delete $h
559 set handlers_ [lreplace $handlers_ $i $i]
560 return
561 }
562 incr i
563 }
564 }
565
566
567 #-----------------------------------------------------------------------
568 # Method:
569 # HMAgent pending_timer
570 # Description:
571 # Check if there is a timer with id == $tid pending.
572 # Arguments:
573 # tid -- ID of the timer to check.
574 #-----------------------------------------------------------------------
575 HMAgent instproc pending_timer tid {
576 $self instvar tid_
577 return [info exists tid_($tid)]
578 }
579
580
581 #-----------------------------------------------------------------------
582 # Method:
583 # HMAgent pending_launches
584 # Description:
585 # Return the number of launches pending.
586 #-----------------------------------------------------------------------
587 HMAgent instproc pending_launches {} {
588 $self instvar launched_
589 return [llength [array names launched_]]
590 }
591
592
593 #-----------------------------------------------------------------------
594 # Method:
595 # HMAgent cancel_timer
596 # Description:
597 # Remove the "after" callback and information regarding a scheduled
598 # launched identified by $tid.
599 # Arguments:
600 # tid -- ID of the timer to cancel.
601 #-----------------------------------------------------------------------
602 HMAgent instproc cancel_timer {tid} {
603 $self instvar tid_ launched_
604 if [info exists launched_($tid)] {
605 unset launched_($tid)
606 }
607 if { [info exists tid_($tid)] } {
608 $self log "cancelled timer $tid"
609 after cancel $tid_($tid)
610 unset tid_($tid)
611 }
612 }
613
614
615 #-----------------------------------------------------------------------
616 # Method:
617 # HMAgent sched_launch
618 # Description:
619 # Schedule a launch by creating an "after" callback. The timeout
620 # period for launching is randomly, uniformly chosen from [0, 2000*N],
621 # where N is the estimate number of host manager currently running.
622 # Arguments:
623 # srv_name -- Name of the service to launch.
624 # srv_loc -- Where to find the executable of the service.
625 # srv_inst -- A unique ID to the service.
626 # msg -- Data for the service.
627 #-----------------------------------------------------------------------
628 HMAgent public sched_launch { srv_name srv_loc srv_inst msg } {
629 $self instvar al_
630 set numhm [$al_(hm) hmnum]
631 set T [expr $numhm * 2000]
632 set max [$self get_option maxWait]
633 if { $T > $max } {
634 set $T $max
635 }
636 # FIXME bias T by load.
637
638 # set lambda [$self get_option lambda]
639 # set r [HMAgent exp_timer $lambda $T]
640 set r [HMAgent uniform_timer $T]
641 $self log "timer $srv_name $srv_inst $r"
642
643 set tid [after $r "$self launch $srv_name $srv_loc $srv_inst {$msg}"]
644 $self instvar tid_
645 set tid_($srv_inst) $tid
646 }
647
648 #
649 # Exponential launch timer
650 # F(x) = 1/(exp(lambda) - 1) * (exp(lambda/T * x) - 1)
651 #
652 # => x = T/lambda log((exp(lambda) - 1) F(x) + 1)
653 #
654 # HMAgent proc exp_timer { lambda T } {
655 # set r [expr [random]/double(0x7fffffff)]
656 # set o [expr ($T/$lambda) * log((exp($lambda) - 1)*$r + 1)]
657 # return [expr int($o+0.5)]
658 # }
659
660
661 #-----------------------------------------------------------------------
662 # Method:
663 # HMAgent uniform_timer
664 # Description:
665 # Uniformly pick a random integer between 0 and T and return it.
666 # Arguments:
667 # T -- upper bound for the random number.
668 #-----------------------------------------------------------------------
669 HMAgent proc uniform_timer { T } {
670 set r [expr [random]/double(0x7fffffff)]
671 set o [expr $r*$T]
672 return [expr int($o+0.5)]
673 }
674
675
676 #-----------------------------------------------------------------------
677 # Method:
678 # HMAgent suppress_timer
679 # Description:
680 # Cancel our own launch, because someone else has already launch the
681 # same service.
682 # Arguments:
683 # tid -- ID to the service to cancel.
684 #-----------------------------------------------------------------------
685 HMAgent public suppress_timer { tid } {
686 $self instvar tid_ launched_
687 $self log "suppress timer $tid"
688 if { [info exists tid_($tid)] && ![info exists launched_($tid)] } {
689 after cancel $tid_($tid)
690 unset tid_($tid)
691 }
692 }
693
694
695 #-----------------------------------------------------------------------
696 # Method:
697 # HMAgent open_cb
698 # HMAgent close_cb
699 # Description:
700 # Open and close a coordination bus.
701 # Arguments:
702 # cb -- Coordination bus to close.
703 # handler -- Unused junk.
704 #-----------------------------------------------------------------------
705 HMAgent instproc close_cb cb {
706 set c [$cb set channel_]
707
708 $self instvar chanmap_
709 incr chanmap_($c) -1
710 if { $chanmap_($c) <= 0 } {
711 delete $cb
712 unset chanmap_($c)
713 }
714 }
715
716 HMAgent instproc open_cb { handler } {
717 $self instvar cbchannel_ chanmap_
718 set cb [new CoordinationBus -channel $cbchannel_]
719 set chanmap_($cbchannel_) 1
720
721 # FIXME
722 incr cbchannel_
723
724 return $cb
725 }
726
727
728 #-----------------------------------------------------------------------
729 # Method:
730 # HMAgent read_hmhosts
731 # Description:
732 # Read a file called "hmhosts", which contains a list of hosts hm can
733 # run on. The list of hosts are stored into a list called hmhosts_.
734 #-----------------------------------------------------------------------
735 HMAgent instproc read_hmhosts {} {
736 $self instvar hmhosts_
737
738 set path [$self get_option execPath]
739
740 set f "$path/hmhosts"
741 if { $f == "" } {
742 $self log "hm: warning: no host file - disabling load_check"
743 return
744 }
745
746 set fd [open $f r]
747 if { $fd < 0 } {
748 $self log "hm: problems opening $f"
749 return
750 }
751
752 while { [gets $fd line] > 0 } {
753 if { [intoa [lookup_host_addr $line]] != [localaddr] } {
754 lappend hmhosts_ $line
755 }
756 }
757 close $fd
758 }
759
760
761 #-----------------------------------------------------------------------
762 # Method:
763 # HMAgent init_load_check
764 # Description:
765 # Create a time to call method load_check{ } periodically.
766 #-----------------------------------------------------------------------
767 HMAgent instproc init_load_check {} {
768
769 set low_ 0
770 set high_ 0
771
772 set t [$self get_option checkInterval]
773 after $t "$self load_check"
774 }
775
776
777 #-----------------------------------------------------------------------
778 # Method:
779 # HMAgent load_check
780 # Description:
781 # Checks the current load on this machine. If the load is too high,
782 # Sheds some load by randomly killing of servents (see shed_load{}).
783 # If the load is too low, kill self. (Huh?).
784 #-----------------------------------------------------------------------
785 HMAgent instproc load_check {} {
786 $self instvar app_ low_ high_ al_
787
788 set load [$self get_load]
789 set nsamples [$self get_option loadSamples]
790 if { $load > [$self get_option highLoad] } {
791 incr high_
792 if { $high_ >= $nsamples } {
793 #$self log "$self spawn"
794 if { [$self spawn] != 0 } {
795 #$self log "shed_load"
796 $self shed_load
797 }
798 }
799 } elseif { $load < [$self get_option lowLoad] } {
800 incr low_
801 if { $low_ >= $nsamples } {
802 # Don't die if the number of hm's is at the minimum.
803 set minhm [$self get_option minHmNum]
804 set hmnum [$al_(hm) hmnum]
805 # count ourselves
806 incr hmnum
807 if { $hmnum > $minhm } {
808 #$self log "$self die"
809 $self die
810 return
811 }
812 }
813 } else {
814 set low_ 0
815 set high_ 0
816 }
817
818 set t [$self get_option checkInterval]
819 after $t "$self load_check"
820 }
821
822 #HMAgent instproc get_load {} {
823 # set v [catch {open "|vmstat cpu"} fd]
824 # if { $v != 0 } {
825 # return 0
826 # }
827 # # Want last field of line #4
828 # gets $fd
829 # gets $fd
830 # set l [gets $fd]
831 # close $fd
832 # set n [llength $l]
833 # return [expr 100-[lindex $l [expr $n-1]]]
834 #}
835
836
837 #-----------------------------------------------------------------------
838 # Method:
839 # HMAgent get_load
840 # Description:
841 # Call uptime and return the current load on this machine.
842 #-----------------------------------------------------------------------
843 HMAgent proc get_load {} {
844 set v [catch {open "|uptime"} fd]
845 if { $v != 0 } {
846 return 0
847 }
848 set l [gets $fd]
849 close $fd
850 # Format:
851 # FIXMEX load averages: 0.00, 0.00, 0.00
852 set n [llength $l]
853 set avg [string trim [lindex $l [expr $n - 3]] ,]
854 return $avg
855 }
856
857
858 #-----------------------------------------------------------------------
859 # Method:
860 # HMAgent spawn
861 # Description:
862 # Spawn a copy of hm on another random host.
863 #-----------------------------------------------------------------------
864 HMAgent instproc spawn {} {
865 $self instvar al_ hmhosts_ app_
866
867 # build hmhosts_ - hmlist
868 set hmlist [$al_(hm) hmaddrs]
869
870 if { [$self get_option glunix] != "" } {
871 set tlist [eval exec "glustat -s l -l"]
872 } else {
873 set tlist $hmhosts_
874 }
875 # remove any local entries we may have
876 set i [lsearch -exact $tlist [localaddr]]
877 set tlist [lreplace $tlist $i $i]
878
879 foreach h $hmlist {
880 set i [lsearch -exact $tlist $h]
881 set tlist [lreplace $tlist $i $i]
882 }
883 #$self log "hmlist=$hmlist"
884 #$self log "hmhosts=$hmhosts_"
885 #$self log "tlist=$tlist"
886 set n [llength $tlist]
887 if { $n == 0 } {
888 # no hosts to spawn to
889 return 0
890 }
891 if { [$self get_option glunix] != "" } {
892 # glustat already sorted by load.
893 set r 0
894 } else {
895 set r [expr [random] % $n]
896 }
897 set shost [lindex $tlist $r]
898
899 $self dospawn $shost
900
901 return 1
902 }
903
904
905 #-----------------------------------------------------------------------
906 # Method:
907 # HMAgent dospawn
908 # Description:
909 # Called by spawn{}, spawn a copy of hm on $shost.
910 #-----------------------------------------------------------------------
911 HMAgent private dospawn { shost } {
912 $self instvar app_ al_
913 set path [$self get_option execPath]
914 set argv [$self get_option execArgs]
915 set cmd [$self get_option execCmd]
916
917 $self log "eval exec $cmd $shost $path/smash $path/hm $argv >& /dev/null &"
918 if { [catch "eval exec $cmd $shost $path/smash $path/hm $argv >& /dev/null &" t] != 0 } {
919 $self log "catch error: $t"
920 }
921 }
922
923
924 #-----------------------------------------------------------------------
925 # Method:
926 # HMAgent shed_load
927 # Description:
928 # Randomly kills of servents.
929 #-----------------------------------------------------------------------
930 HMAgent instproc shed_load {} {
931 $self instvar handlers_
932 foreach h $handlers_ {
933 set pid [$h set pid_]
934 # Shed 50% of the load randomly.
935 set r [expr [random]/double(0x7fffffff)]
936 if { $r < 0.5 } {
937 $self log "exec kill -9 $pid"
938 catch "eval exec kill -9 $pid"
939 }
940 }
941 }
942
943
944 #-----------------------------------------------------------------------
945 # Method:
946 # HMAgent die
947 # Description:
948 # Schedule a callback to kill self.
949 #-----------------------------------------------------------------------
950 HMAgent instproc die {} {
951 $self instvar al_ dying_ app_
952
953 set t [$self get_option deathInterval]
954 set minwait [$self get_option minDeathWait]
955 set r [expr ([random] % $t) + $minwait]
956 set dying_ [after $r "$self really_die"]
957 }
958
959
960 #-----------------------------------------------------------------------
961 # Method:
962 # HMAgent really_die
963 # Description:
964 # Really kill self.
965 #-----------------------------------------------------------------------
966 HMAgent instproc really_die {} {
967 $self instvar app_ al_ dying_ low_ high_
968
969 $al_(hm) announce_death
970 set minhm [$self get_option minHmNum]
971 set hmnum [$al_(hm) hmnum]
972 # count ourselves
973 incr hmnum
974 # one last check
975 if { $hmnum > $minhm && \
976 [$self get_load] < [$self get_option lowLoad] } {
977 $self log "exit 0"
978 $self doexit
979 }
980 # get back in the mix!
981 unset dying_
982 set low_ 0
983 set high_ 0
984 $self load_check
985 }
986
987
988 #-----------------------------------------------------------------------
989 # Method:
990 # HMAgent recv_death
991 # Description:
992 # If someone has already kill itself, it is possible that we do not
993 # have to kill ourself. Recheck.
994 #-----------------------------------------------------------------------
995 HMAgent instproc recv_death {} {
996 $self instvar dying_ low_ high_
997
998 if ![info exists dying_] {
999 return
1000 }
1001
1002 # Someone else is dying, we get a reprieve...
1003 after cancel $dying_
1004 unset dying_
1005 set low_ 0
1006 set high_ 0
1007 $self load_check
1008 }
1009
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.