1 # cache.tcl --
2 #
3 # FIXME: This file needs a description here.
4 #
5 # Copyright (c) 1998-2002 The Regents of the University of California.
6 # All rights reserved.
7 #
8 # Redistribution and use in source and binary forms, with or without
9 # modification, are permitted provided that the following conditions are met:
10 #
11 # A. Redistributions of source code must retain the above copyright notice,
12 # this list of conditions and the following disclaimer.
13 # B. Redistributions in binary form must reproduce the above copyright notice,
14 # this list of conditions and the following disclaimer in the documentation
15 # and/or other materials provided with the distribution.
16 # C. Neither the names of the copyright holders nor the names of its
17 # contributors may be used to endorse or promote products derived from this
18 # software without specific prior written permission.
19 #
20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 # ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
24 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 import WebCacheControl WC_Recorder WC_Player
32
33 #
34 # A web cache object. It takes care of storing web contents
35 # on local disk index by the associated urls. It is also
36 # responsible for fetching the contents directly from the
37 # origin server if the data is not in the local cache.
38 #
39 Class WebCacheApplication -superclass Application -configuration {
40 cacheDir ~/.mash/srmcache/
41 cacheSize 1000000
42 playFilename none
43 recordFilename none
44 d1 1.0
45 d2 2.0
46 e1 2.0
47 e2 3.0
48 }
49
50 #
51 # The web cache constructor.
52 #
53 WebCacheApplication public init { argv } {
54 $self next cache
55
56 set netspec [$self init_argv $argv]
57 if { $netspec == "" } {
58 $self usage
59 $self fatal "unknown/invalid address"
60 }
61
62 # srmv2 related variables
63 $self instvar session_ source_
64 set ab [new AddressBlock $netspec]
65 set addr [$ab addr]
66 set port [$ab sport]
67 set ttl [$ab ttl]
68 delete $ab
69
70 set session_ [srm_create_session $addr $port $port $ttl]
71 set source_ [srm_create_source $session_]
72 srm_callbacks $session_ srm_recv srm_should_recover srm_read_adu \
73 srm_source_update srm_recv_cid
74
75 # cache control object
76 $self instvar control_ d1_ d2_ e1_ e2_
77 set e1_ [$self get_option e1]
78 set e2_ [$self get_option e2]
79 set d1_ [$self get_option d1]
80 set d2_ [$self get_option d2]
81 set control_ [new WebCacheControl $d1_ $d2_ $e1_ $e2_ \
82 $addr/[expr $port-1]/$ttl]
83 $control_ set cache_ $self
84
85 # cache related variables
86 $self instvar cache_dir_ index_ index_filename_ used_ total_ lru_
87 $self create_dir [$self get_option cacheDir]
88 set cache_dir_ [glob [$self get_option cacheDir]]
89
90 set index_filename_ [file join $cache_dir_ index.db]
91 if {! [catch {set f [open $index_filename_]}] } {
92 while 1 {
93 set line [gets $f]
94 if [eof $f] {
95 close $f
96 break
97 }
98 set index_([lindex $line 0]) [lindex $line 1]
99 }
100 }
101
102 # keep track of space limitations in cache
103 set lru_ {}
104 set used_ 0
105 set total_ [$self get_option cacheSize]
106
107 # check if we're recording and possibly set up record file.
108 if { [$self get_option record] != "" } {
109 $self instvar recorder_
110 set rf [$self get_option recordFilename]
111 if { $rf == "none" } {
112 set hn [info hostname]
113 set hn [lindex [split $hn .] 0]
114 set rf $hn.rlog
115 }
116 set recorder_ [new WC_Recorder $rf]
117 }
118
119 # check if we're playing and possibly set up a player.
120 if { [$self get_option play] != "" } {
121 $self instvar player_
122 set rf [$self get_option playFilename]
123 if { $rf == "none" } {
124 set hn [info hostname]
125 set hn [lindex [split $hn .] 0]
126 set rf $hn.rlog
127 }
128 set player_ [new WC_Player $rf $self 1]
129 # wait for 2 seconds for everything to settle down
130 after 2000 $player_ start
131 }
132 }
133
134 WebCacheApplication public usage { } {
135 set o [$self options]
136
137 puts "cache: \[-layer|-lazy|-base\] \[other-options-listed-below] address"
138 $o usage
139 }
140
141 WebCacheApplication private init_argv { argv } {
142 set o [$self options]
143
144 $o register_option -cacheDir cacheDir
145 $o register_option -cacheSize cacheSize
146 $o register_option -playFilename playFilename
147 $o register_option -recordFilename recordFilename
148
149 $o register_option -d1 d1
150 $o register_option -d2 d2
151 $o register_option -e1 e1
152 $o register_option -e2 e2
153
154 $o register_boolean_option -record record
155 $o register_boolean_option -play play
156 $o register_boolean_option -playRealtime realtime
157
158 return [$o parse_args $argv]
159 }
160
161
162 #
163 # Called by srm if another cache needs to recover the specified data.
164 #
165 WebCacheApplication public read_data { source cid seqno } {
166 $self instvar index_
167
168 set url [$self cid_2_name $source $cid]
169
170 if { $url == "" || ![info exists index_($url)] } {
171 return ""
172 } else {
173 return $index_($url)
174 }
175 }
176
177
178 #
179 # Called by the cache control after winning the response timer
180 # war. If the data is in the local cache, get it from there.
181 # Otherwise, fetch it from the origin server.
182 #
183 WebCacheApplication public send_data { url } {
184 $self instvar sockets_ source_ index_ cid_names_ proxy_
185
186 mtrace trcWC "cache: send_data $url"
187
188 if ![info exists index_($url)] {
189 # need to fetch data from origin server first
190 $self fetch $url
191 return
192 }
193
194 # allocate a cid for this url if one does not exist
195 set p [split [$self name_2_cid $url] ,]
196 set source [lindex $p 0]
197 set cid [lindex $p 1]
198 if { $cid == "" || $source != $source_} {
199 set cid [srm_calloc $source_ 0 $url]
200 set cid_names_($source_,$cid) $url
201 }
202
203 set f [open $index_($url)]
204 fconfigure $f -translation binary
205 set buffer ""
206 while { ![eof $f] } {
207 append buffer [read $f 4096]
208 }
209 close $f
210
211 # send the data to the session
212 srm_send $source_ $cid $buffer
213
214 ztrace "done [gettimeofday] $url fetch"
215
216 # callback to proxy to hand data to browser
217 if [info exists sockets_($url)] {
218 $proxy_ done_fetch $url $sockets_($url) $index_($url)
219 unset sockets_($url)
220 }
221 }
222
223
224 #
225 # Called when received data from the cache session. The
226 # data is stored in the filename <i>fn</i>.
227 #
228 WebCacheApplication public recv_data { source cid seqno fn } {
229 $self instvar proxy_ control_ sockets_ index_
230
231 # do a reverse mapping lookup to get url
232 set url [$self cid_2_name $source $cid]
233
234 ztrace "done [gettimeofday] $url srm"
235 mtrace trcWC "cache: recv_data $url"
236
237 # store the data onto local cache
238 $self put $url $fn $seqno
239
240 # tell cache control to cancel timers related for this url
241 $control_ cancel_all_timers $url
242
243 # callback to proxy to hand data to browser
244 if [info exists sockets_($url)] {
245 $proxy_ done_fetch $url $sockets_($url) $index_($url)
246 unset sockets_($url)
247 }
248 }
249
250
251 #
252 # Default behavior of cache is to recovery everything.
253 # Subclasses of this object should define their recovery strategies.
254 #
255 WebCacheApplication public should_recover { source cid sseq eseq } {
256 return 1
257 }
258
259
260 #
261 # Receive an URL to SRM mapping information. If this is a duplicate
262 # mapping for the same URL for different SRM names, we still remember
263 # this, but make sure we don't store the same data repeatedly.
264 #
265 WebCacheApplication public recv_cid { source cid parent_cid name } {
266 $self instvar cid_names_
267
268 mtrace trcWC "cache: recv_cid $name"
269 set cid_names_($source,$cid) $name
270 }
271
272
273 #
274 # Given a cid, this method returns the application level
275 # name (url) associated with it.
276 #
277 WebCacheApplication public cid_2_name { source cid } {
278 $self instvar cid_names_
279
280 if [info exists cid_names_($source,$cid)] {
281 set url $cid_names_($source,$cid)
282 } else {
283 set url [srm_get_container_name $source $cid]
284 }
285 return $url
286 }
287
288 #
289 # Give a name (url), this method returns the (source,cid) pair
290 # associated with it or an empty if it does not exist.
291 #
292 WebCacheApplication public name_2_cid { url } {
293 $self instvar source_ cid_names_
294
295 foreach p [array names cid_names_] {
296 if { "$cid_names_($p)" == "$url" } {
297 return [split $p ,]
298 }
299 }
300 return ""
301 }
302
303 #
304 # Returns a filename if the url is local in the cache.
305 # Otherwise, an empty string is returned.
306 #
307 WebCacheApplication public hit { url } {
308 $self instvar index_
309 if [info exists index_($url)] {
310 return $index_($url)
311 } else {
312 return ""
313 }
314 }
315
316 #
317 # Called to initiate the loop to access the cache. For example,
318 # from casting director in mashcast, or full page request from
319 # infocaster, or from the browser proxy.
320 #
321 WebCacheApplication public get { url { socket "" } } {
322 $self instvar index_ sockets_ proxy_ control_
323
324 ztrace "get [gettimeofday] $url"
325 mtrace trcWC "cache: get $url"
326
327 # record this url request into a file if we're recording
328 if { [$self get_option record] != "" } {
329 $self instvar recorder_
330 $recorder_ record $url
331 }
332
333 # remove url from stack and put on top
334 $self push_lru $url
335
336 if { $socket != "" } {
337 set sockets_($url) $socket
338 }
339
340 if [info exists index_($url)] {
341 # cache has data on local disk
342 ztrace "done [gettimeofday] $url disk"
343 mtrace trcWC "cache: get from disk"
344
345 # callback to proxy if it is requested from there
346 if [info exists sockets_($url)] {
347 $proxy_ done_fetch $url $sockets_($url) $index_($url)
348 unset sockets_($url)
349 }
350 } else {
351 # cache needs to get data from others
352 mtrace trcWC "cache: get from others"
353
354 set m [$self name_2_cid $url]
355 if { $m != "" } {
356 # if a mapping exists already, this means the
357 # data is already in some other cache, so
358 # we do a srm repair request on this cid.
359 # note: we assume there is always only one adu
360 # in any container
361
362 srm_recover [lindex $m 0] [lindex $m 1] 0 0
363 } else {
364 # the data is not in local disk nor other caches
365 # so start a timer loop
366
367 $control_ create_get_timer $url
368 }
369 }
370 }
371
372 #
373 # Put the contents of url into the cache. The data is stored
374 # in the filename <i>fn</i>. Even though the data might not
375 # have originated from local clients, we still count it as
376 # the most recently accessed in the LRU algorithm.
377 #
378 WebCacheApplication public put { url fn { seqno 0 } } {
379 $self instvar index_ cache_dir_ index_filename_ sockets_ \
380 used_ total_ control_
381
382 mtrace trcWC "cache: put $url $fn"
383
384 # do lru business
385 $self push_lru $url
386
387 # if the data is already on disk, don't need to store a
388 # duplicate. this situation can arise if we have to different
389 # srm names for the same url.
390 if [info exists index_($url)] {
391 return
392 }
393
394 # check whether there is enough space in cache
395 # if not need to make room
396
397 set fs [file size $fn]
398
399 if { [expr $fs + $used_] > $total_ } {
400 # need to make room in cache
401 $self make_room [expr $fs - ($total_ - $used_)]
402 }
403
404 set update_index_file 1
405
406 set name cache[clock clicks]
407 set index_($url) [file join $cache_dir_ $name[file extension $url]]
408
409 file rename $fn $index_($url)
410
411 # write the index file
412 if [catch {set f [open $index_filename_ a]}] {
413 set f [open $index_filename_ w 0644]
414 }
415
416 puts $f [list $url $index_($url)]
417 close $f
418
419 # tell cache control to cancel timers related for this url
420 #$control_ cancel_all_timers $url
421 }
422
423
424 #
425 # Adjust LRU stack
426 #
427 WebCacheApplication private push_lru { url } {
428 $self instvar lru_
429
430 # this url is the most recently used so move it to the
431 # top of the lru stack
432 set idx [lsearch $lru_ $url]
433 if { $idx != -1 } {
434 set lru_ [lreplace $lru_ $idx $idx]
435 }
436 lappend lru_ $url
437 }
438
439
440 #
441 # Fetch the contents of <i>url</i> from the origin server.
442 # This is non-blocking and calls done_fetch when the
443 # transactionis finished.
444 #
445 WebCacheApplication private fetch { url } {
446 mtrace trcWC "cache: fetching... $url"
447 set token [::http::geturl $url -command "$self done_fetch"]
448 }
449
450
451 #
452 # Called when start_fetch is finished. This puts the contents
453 # fetched into the cache by passing the data to the put method.
454 # Do a callback to cache control to notify the data is here.
455 #
456 WebCacheApplication private done_fetch { token } {
457 $self instvar proxy_ control_ sockets_ index_ cache_dir_
458
459 upvar #0 $token state
460 set url $state(url)
461
462 mtrace trcWC "cache: done_fetch $url"
463
464 # if we received the data via the srm session while we
465 # were fetching the data from the origin server, we can
466 # just return because recv_data would have put the data in
467 # our cache and passed the data to the browser, if necessary.
468 if [info exists index_($url)] {
469 return
470 }
471
472 set fn [file join $cache_dir_ tmp[clock clicks]]
473 set f [open $fn w 0644]
474 fconfigure $f -translation binary
475 puts -nonewline $f "$state(http)\r\n"
476 foreach {name value} $state(meta) {
477 puts -nonewline $f "$name: $value\r\n"
478 }
479 puts -nonewline $f "\r\n"
480 puts -nonewline $f $state(body)
481 close $f
482
483 # tell cache control to cancel timers related for this url
484 $control_ cancel_all_timers $url
485
486 # put the data onto disk
487 $self put $url $fn
488
489 # send the data to session
490 $self send_data $url
491 }
492
493 #
494 # Uses LRU algorithm to determine which object to evict from the
495 # cache. Subclasses should define their own strategies. <i>needed</i>
496 # denotes the amount of space needed in B.
497 #
498 WebCacheApplication public make_room { needed } {
499 $self instvar index_ lru_ used_
500
501 set cleared 0
502
503 while { $cleared < needed } {
504
505 # find the lru object in cache
506 set url [lindex $lru_ 0]
507 set fn $index_($url)
508 set fs [file size $fn]
509
510 set cleared [expr $cleared + $fs]
511
512 $self flush_url $url
513 }
514
515 }
516
517 WebCacheApplication private flush_url { url } {
518 $self instvar index_ lru_ used_
519
520 # remove the file and clean up cache index
521 file delete $fn
522 unset index_($url)
523 set lru_ [lrange $lru_ 1 end]
524
525 set used_ [expr $used_ - $fs]
526 }
527
528 WebCacheApplication public flush_all { } {
529 $self instvar index_ cache_dir_
530 file delete -force -- [glob -nocomplain [file join $cache_dir_ *]]
531 catch {unset index_}
532 }
533
534 WebCacheApplication private create_dir { path } {
535 if { ![file isdirectory $path] } {
536 set dir ""
537 foreach split [file split $path] {
538 set dir [file join $dir $split]
539 if { ![file exists $dir] } {
540 # this command will cause an error
541 # if it is not possible to create the dir
542 file mkdir $dir
543 }
544 }
545 }
546 }
547
548 #
549 # SRMv2 tcl callbacks
550 #
551
552 # FIXME
553 set fn {/tmp/recv.dat}
554
555 proc srm_recv { src cid seqno data } {
556 global cache fn
557
558 set f [open $fn w 0644]
559 fconfigure $f -translation binary
560 puts -nonewline $f $data
561 close $f
562
563 $cache recv_data $src $cid $seqno $fn
564 }
565
566 proc srm_read_adu { src cid seqno } {
567 global cache
568
569 set fn [$cache read_data $src $cid $seqno]
570
571 if { $fn == "" } {
572 return ""
573 } else {
574 set f [open $fn]
575 fconfigure $f -translation binary
576 set buffer ""
577 while { ![eof $f] } {
578 append buffer [read $f 4096]
579 }
580 close $f
581
582 return $buffer
583 }
584 }
585
586 proc srm_should_recover { src cid sseq eseq } {
587 global cache
588 $cache should_recover $src $cid $sseq $eseq
589 }
590
591
592 proc srm_recv_cid { src cid parent_cid name } {
593 global cache
594 $cache recv_cid $src $cid $parent_cid $name
595 }
596
597 proc srm_source_update { src info } {
598 # do nothing now
599 }
600
601
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.