1 # status_checker.tcl --
2 #
3 # FIXME: This file needs a description here.
4 #
5 # Copyright (c) 1997-2002 The Regents of the University of California.
6 # All rights reserved.
7 #
8 # Redistribution and use in source and binary forms, with or without
9 # modification, are permitted provided that the following conditions are met:
10 #
11 # A. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 # B. Redistributions in binary form must reproduce the above copyright notice,
14 # this list of conditions and the following disclaimer in the documentation
15 # and/or other materials provided with the distribution.
16 # C. Neither the names of the copyright holders nor the names of its
17 # contributors may be used to endorse or promote products derived from this
18 # software without specific prior written permission.
19 #
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 # ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
24 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #
31 # @(#) $Header: /usr/mash/src/repository/mash/mash-1/tcl/applications/pathfinder/status_checker.tcl,v 1.4 2002/02/03 04:22:06 lim Exp $
32
33
34 #
35 # State 1: (system successfully running)
36 # while (successful ping) { }
37 # send email "mash_server crashed, trying to restart"
38 # goto state 0
39 #
40 # State 0: (system not responding)
41 # kill mash_server
42 # restart mash_server
43 # if (successful ping before timeout) {
44 # goto state 1
45 # } else {
46 # send email "restart failed, will keep trying"
47 # goto state -1
48 # }
49 #
50 # State -1: (restart failed)
51 # while ( !(successful ping) ) { }
52 # send email "mash_server back up"
53 # goto state 1
54 #
55
56 import MTrace
57 import Configuration
58
59 Class Status_Checker
60
61 Status_Checker public init { } {
62
63 $self instvar email_addr_ server_addr_ server_port_ \
64 sendmail_command_ server_command_ output_dir_ \
65 ping_freq_
66
67 # Load the mserver preferences.
68 set o [$self options]
69 $o load_preferences "mserver"
70 set server_addr_ [$self get_option server_addr]
71 set server_port_ [$self get_option server_port]
72
73 # Initialize the email address to which error messages will be sent.
74 set email_addr_ [$self get_option email_addr]
75
76 # Locate the sendmail binary using either the options file or the
77 # the "which" command and create the command to be used by the
78 # send_msg method.
79 set sendmail_command_ [$self get_option sendmail_command]
80 if { $sendmail_command_ == "" } {
81 if [catch { set sendmail_command_ [exec which sendmail] }] {
82 puts "sendmail command not found; please specify path in\
83 prefs-mserver file using sendmail_command."
84 exit
85 }
86 }
87 append sendmail_command_ " -t"
88
89 # Determine the path for the mash_server from either the options
90 # directory or by locating it using "which."
91 set server_command_ [$self get_option server_command]
92 if { $server_command_ == "" } {
93 if [catch { set server_command_ [exec which mash_server] }] {
94 puts "mash_server command not found; please specify path\
95 in prefs-mserver file using server_command."
96 exit
97 }
98 }
99
100 # Find the server's output directory from the options file.
101 set output_dir_ [$self get_option output_dir]
102
103 # Initialize the ping frequency from the options file. If ping
104 # frequency equals x, then there are x seconds between pings.
105 set ping_freq_ 30
106 set ping_freq_ [$self get_option ping_freq]
107
108 $self event_loop
109 }
110
111
112 Status_Checker private event_loop { } {
113
114 $self instvar ping_freq_
115
116 mtrace trcNet "In Status_Checker::event_loop"
117
118 # Initialize the states to be used in the event loop and the
119 # current state.
120 set alive 1
121 set not_responding 0
122 set restart_failed -1
123 set state $alive
124 set second_try 0
125
126 # Enter the event loop.
127 while { 1 } {
128 if { $state == $alive } {
129 mtrace trcNet "-> In 'alive' state"
130 while { [$self ping] } {
131 exec sleep $ping_freq_
132 }
133 mtrace trcNet "-> Exiting 'alive' state"
134 set state $not_responding
135
136 } elseif { $state == $not_responding } {
137 mtrace trcNet "-> In 'not responding' state"
138 $self kill_server
139 set output_file [$self restart_server]
140 $self send_msg "mash_server crashed; trying to restart\ndebug output file: $output_file"
141 exec sleep $ping_freq_
142 if { [$self ping] } {
143 set state $alive
144 set second_try 0
145 $self send_msg "mash_server back up"
146
147 } elseif { $second_try == 0 } {
148 $self send_msg "restart failed, trying again"
149 set second_try 1
150 } else {
151 $self send_msg "restart failed"
152 set second_try 0
153 set state $restart_failed
154 }
155
156 } elseif { $state == $restart_failed } {
157 mtrace trcNet "-> In 'restart failed' state"
158 while { ![$self ping] } {
159 exec sleep $ping_freq_
160 }
161 $self send_msg "mash_server back up"
162 set state $alive
163 }
164 }
165 }
166
167
168 #
169 # This method returns true if the server responds to GET requests.
170 #
171 Status_Checker private ping { } {
172
173 $self instvar server_addr_ server_port_
174
175 # Open a socket connection to the server.
176 if [catch { set socket [socket $server_addr_ $server_port_]}] {
177 mtrace trcNet "-> Open socket failed."
178 return 0
179 }
180
181 # Send a GET request to the mash_server.
182 set get_request "GET / HTTP/1.0\r\n\r\n"
183 puts -nonewline $socket $get_request
184 flush $socket
185
186 set buffer [gets $socket]
187 close $socket
188
189 if { $buffer == "" } {
190 mtrace trcNet "-> Nothing read from socket."
191 return 0
192 }
193
194 return 1
195 }
196
197
198 Status_Checker private send_msg { msg } {
199
200 $self instvar email_addr_ sendmail_command_
201
202 mtrace trcNet "In Status_Checker::send_msg"
203
204 # Create the email message to be sent.
205 set from_text "From: mash_server checker\n"
206 set to_text "To: $email_addr_\n"
207 set date [clock format [clock seconds] -format {%a, %d %B %Y %H:%M (%Z)}]
208 set date_text "Date: $date\n"
209 set sub_text "Subject: \n"
210 set end_msg ".\n"
211
212 append text $from_text $to_text $date_text $sub_text "\n$msg\n" $end_msg
213
214 # Send the email message using the sendmail command.
215 set command_id [open "| $sendmail_command_" w]
216 puts $command_id i$text
217 close $command_id
218
219 mtrace trcNet "-> Exiting send_msg"
220 }
221
222
223 Status_Checker private kill_server { } {
224
225 mtrace trcNet "In method Status_Checker::kill_server"
226
227 if [catch { set process_line [exec ps | grep bin/mash_server] }] {
228 mtrace trcNet "-> Server not running."
229 return
230 }
231
232 mtrace trcNet "-> Killing server process."
233 set pid [lindex [split $process_line] 0]
234 exec kill $pid
235 }
236
237
238 Status_Checker private restart_server { } {
239
240 $self instvar server_addr_ server_command_ output_dir_
241
242 mtrace trcNet "In method Status_Checker::restart_server"
243
244 cd $output_dir_
245
246 # Move the old output file into an archived output file.
247 set new_filename ""
248 if { [file exists output.txt] } {
249 set new_filename "output[clock seconds].txt"
250 exec mv output.txt $new_filename
251 }
252
253 # For now, just test on the same machine.
254 mtrace trcNet "-> Restarting the server."
255 exec $server_command_ >& output.txt &
256 # append output_file $output_dir_ output.txt
257 # set command "$server_command_ >& $output_file &"
258 # exec ssh -n $server_addr_ $command
259
260 return $new_filename
261 }
262
263
264 MTrace init { trcNet }
265 set checker [new Status_Checker]
266
267
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.