~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Open Mash Cross Reference
mash/tcl/cache/cache.tcl

Component: ~ [ mash ] ~ [ apps ] ~ [ gsm ] ~ [ lib ] ~ [ otcl ] ~ [ srm ] ~ [ tcl8.3 ] ~ [ tclcl ] ~ [ tk8.3 ] ~ [ tutorials ] ~

  1 # cache.tcl --
  2 #
  3 #       FIXME: This file needs a description here.
  4 #
  5 # Copyright (c) 1998-2002 The Regents of the University of California.
  6 # All rights reserved.
  7 #
  8 # Redistribution and use in source and binary forms, with or without
  9 # modification, are permitted provided that the following conditions are met:
 10 #
 11 # A. Redistributions of source code must retain the above copyright notice,
 12 #    this list of conditions and the following disclaimer.
 13 # B. Redistributions in binary form must reproduce the above copyright notice,
 14 #    this list of conditions and the following disclaimer in the documentation
 15 #    and/or other materials provided with the distribution.
 16 # C. Neither the names of the copyright holders nor the names of its
 17 #    contributors may be used to endorse or promote products derived from this
 18 #    software without specific prior written permission.
 19 #
 20 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
 21 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 23 # ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
 24 # ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 28 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 
 31 import WebCacheControl WC_Recorder WC_Player
 32 
 33 #
 34 # A web cache object. It takes care of storing web contents
 35 # on local disk index by the associated urls. It is also
 36 # responsible for fetching the contents directly from the
 37 # origin server if the data is not in the local cache.
 38 #
 39 Class WebCacheApplication -superclass Application -configuration {
 40         cacheDir ~/.mash/srmcache/
 41         cacheSize 1000000
 42         playFilename none
 43         recordFilename none
 44         d1 1.0
 45         d2 2.0
 46         e1 2.0
 47         e2 3.0
 48 }
 49 
 50 #
 51 # The web cache constructor.
 52 #
 53 WebCacheApplication public init { argv } {
 54         $self next cache
 55 
 56         set netspec [$self init_argv $argv]
 57         if { $netspec == "" } {
 58                 $self usage
 59                 $self fatal "unknown/invalid address"
 60         }
 61 
 62         # srmv2 related variables
 63         $self instvar session_ source_
 64         set ab [new AddressBlock $netspec]
 65         set addr [$ab addr]
 66         set port [$ab sport]
 67         set ttl  [$ab ttl]
 68         delete $ab
 69 
 70         set session_ [srm_create_session $addr $port $port $ttl]
 71         set source_ [srm_create_source $session_]
 72         srm_callbacks $session_ srm_recv srm_should_recover srm_read_adu \
 73                 srm_source_update srm_recv_cid
 74 
 75         # cache control object
 76         $self instvar control_ d1_ d2_ e1_ e2_
 77         set e1_ [$self get_option e1]
 78         set e2_ [$self get_option e2]
 79         set d1_ [$self get_option d1]
 80         set d2_ [$self get_option d2]
 81         set control_ [new WebCacheControl $d1_ $d2_ $e1_ $e2_ \
 82                         $addr/[expr $port-1]/$ttl]
 83         $control_ set cache_ $self
 84 
 85         # cache related variables
 86         $self instvar cache_dir_ index_ index_filename_ used_ total_ lru_
 87         $self create_dir [$self get_option cacheDir]
 88         set cache_dir_ [glob [$self get_option cacheDir]]
 89 
 90         set index_filename_ [file join $cache_dir_ index.db]
 91         if {! [catch {set f [open $index_filename_]}] } {
 92                 while 1 {
 93                         set line [gets $f]
 94                         if [eof $f] {
 95                                 close $f
 96                                 break
 97                         }
 98                         set index_([lindex $line 0]) [lindex $line 1]
 99                 }
100         }
101 
102         # keep track of space limitations in cache
103         set lru_ {}
104         set used_ 0
105         set total_ [$self get_option cacheSize]
106 
107         # check if we're recording and possibly set up record file.
108         if { [$self get_option record] != "" } {
109                 $self instvar recorder_
110                 set rf [$self get_option recordFilename]
111                 if { $rf == "none" } {
112                         set hn [info hostname]
113                         set hn [lindex [split $hn .] 0]
114                         set rf $hn.rlog
115                 }
116                 set recorder_ [new WC_Recorder $rf]
117         }
118 
119         # check if we're playing and possibly set up a player.
120         if { [$self get_option play] != "" } {
121                 $self instvar player_
122                 set rf [$self get_option playFilename]
123                 if { $rf == "none" } {
124                         set hn [info hostname]
125                         set hn [lindex [split $hn .] 0]
126                         set rf $hn.rlog
127                 }
128                 set player_ [new WC_Player $rf $self 1]
129                 # wait for 2 seconds for everything to settle down
130                 after 2000 $player_ start
131         }
132 }
133 
134 WebCacheApplication public usage { } {
135         set o [$self options]
136 
137         puts  "cache: \[-layer|-lazy|-base\] \[other-options-listed-below] address"
138         $o usage
139 }
140 
141 WebCacheApplication private init_argv { argv } {
142         set o [$self options]
143 
144         $o register_option -cacheDir cacheDir
145         $o register_option -cacheSize cacheSize
146         $o register_option -playFilename playFilename
147         $o register_option -recordFilename recordFilename
148 
149         $o register_option -d1 d1
150         $o register_option -d2 d2
151         $o register_option -e1 e1
152         $o register_option -e2 e2
153 
154         $o register_boolean_option -record record
155         $o register_boolean_option -play play
156         $o register_boolean_option -playRealtime realtime
157 
158         return [$o parse_args $argv]
159 }
160 
161 
162 #
163 # Called by srm if another cache needs to recover the specified data.
164 #
165 WebCacheApplication public read_data { source cid seqno } {
166         $self instvar index_
167 
168         set url [$self cid_2_name $source $cid]
169 
170         if { $url == "" || ![info exists index_($url)] } {
171                 return ""
172         } else {
173                 return $index_($url)
174         }
175 }
176 
177 
178 #
179 # Called by the cache control after winning the response timer
180 # war. If the data is in the local cache, get it from there.
181 # Otherwise, fetch it from the origin server.
182 #
183 WebCacheApplication public send_data { url } {
184         $self instvar sockets_ source_ index_ cid_names_ proxy_
185 
186         mtrace trcWC  "cache: send_data $url"
187 
188         if ![info exists index_($url)] {
189                 # need to fetch data from origin server first
190                 $self fetch $url
191                 return
192         }
193 
194         # allocate a cid for this url if one does not exist
195         set p [split [$self name_2_cid $url] ,]
196         set source [lindex $p 0]
197         set cid [lindex $p 1]
198         if { $cid == "" || $source != $source_} {
199                 set cid [srm_calloc $source_ 0 $url]
200                 set cid_names_($source_,$cid) $url
201         }
202 
203         set f [open $index_($url)]
204         fconfigure $f -translation binary
205         set buffer ""
206         while { ![eof $f] } {
207                 append buffer [read $f 4096]
208         }
209         close $f
210 
211         # send the data to the session
212         srm_send $source_ $cid $buffer
213 
214         ztrace "done [gettimeofday] $url fetch"
215 
216         # callback to proxy to hand data to browser
217         if [info exists sockets_($url)] {
218                 $proxy_ done_fetch $url $sockets_($url) $index_($url)
219                 unset sockets_($url)
220         }
221 }
222 
223 
224 #
225 # Called when received data from the cache session. The
226 # data is stored in the filename <i>fn</i>.
227 #
228 WebCacheApplication public recv_data { source cid seqno fn } {
229         $self instvar proxy_ control_ sockets_ index_
230 
231         # do a reverse mapping lookup to get url
232         set url [$self cid_2_name $source $cid]
233 
234         ztrace "done [gettimeofday] $url srm"
235         mtrace trcWC  "cache: recv_data $url"
236 
237         # store the data onto local cache
238         $self put $url $fn $seqno
239 
240         # tell cache control to cancel timers related for this url
241         $control_ cancel_all_timers $url
242 
243         # callback to proxy to hand data to browser
244         if [info exists sockets_($url)] {
245                 $proxy_ done_fetch $url $sockets_($url) $index_($url)
246                 unset sockets_($url)
247         }
248 }
249 
250 
251 #
252 # Default behavior of cache is to recovery everything.
253 # Subclasses of this object should define their recovery strategies.
254 #
255 WebCacheApplication public should_recover { source cid sseq eseq } {
256         return 1
257 }
258 
259 
260 #
261 # Receive an URL to SRM mapping information. If this is a duplicate
262 # mapping for the same URL for different SRM names, we still remember
263 # this, but make sure we don't store the same data repeatedly.
264 #
265 WebCacheApplication public recv_cid { source cid parent_cid name } {
266         $self instvar cid_names_
267 
268         mtrace trcWC  "cache: recv_cid $name"
269         set cid_names_($source,$cid) $name
270 }
271 
272 
273 #
274 # Given a cid, this method returns the application level
275 # name (url) associated with it.
276 #
277 WebCacheApplication public cid_2_name { source cid } {
278         $self instvar cid_names_
279 
280         if [info exists cid_names_($source,$cid)] {
281                 set url $cid_names_($source,$cid)
282         } else {
283                 set url [srm_get_container_name $source $cid]
284         }
285         return $url
286 }
287 
288 #
289 # Give a name (url), this method returns the (source,cid) pair
290 # associated with it or an empty if it does not exist.
291 #
292 WebCacheApplication public name_2_cid { url } {
293         $self instvar source_ cid_names_
294 
295         foreach p [array names cid_names_] {
296                 if { "$cid_names_($p)" == "$url" } {
297                         return [split $p ,]
298                 }
299         }
300         return ""
301 }
302 
303 #
304 # Returns a filename if the url is local in the cache.
305 # Otherwise, an empty string is returned.
306 #
307 WebCacheApplication public hit { url } {
308         $self instvar index_
309         if [info exists index_($url)] {
310                 return $index_($url)
311         } else {
312                 return ""
313         }
314 }
315 
316 #
317 # Called to initiate the loop to access the cache. For example,
318 # from casting director in mashcast, or full page request from
319 # infocaster, or from the browser proxy.
320 #
321 WebCacheApplication public get { url { socket "" } } {
322         $self instvar index_ sockets_ proxy_ control_
323 
324         ztrace "get [gettimeofday] $url"
325         mtrace trcWC  "cache: get $url"
326 
327         # record this url request into a file if we're recording
328         if { [$self get_option record] != "" } {
329                 $self instvar recorder_
330                 $recorder_ record $url
331         }
332 
333         # remove url from stack and put on top
334         $self push_lru $url
335 
336         if { $socket != "" } {
337                 set sockets_($url) $socket
338         }
339 
340         if [info exists index_($url)] {
341                 # cache has data on local disk
342                 ztrace "done [gettimeofday] $url disk"
343                 mtrace trcWC  "cache: get from disk"
344 
345                 # callback to proxy if it is requested from there
346                 if [info exists sockets_($url)] {
347                         $proxy_ done_fetch $url $sockets_($url) $index_($url)
348                         unset sockets_($url)
349                 }
350         } else {
351                 # cache needs to get data from others
352                 mtrace trcWC  "cache: get from others"
353 
354                 set m [$self name_2_cid $url]
355                 if { $m != "" } {
356                         # if a mapping exists already, this means the
357                         # data is already in some other cache, so
358                         # we do a srm repair request on this cid.
359                         # note: we assume there is always only one adu
360                         # in any container
361 
362                         srm_recover [lindex $m 0] [lindex $m 1] 0 0
363                 } else {
364                         # the data is not in local disk nor other caches
365                         # so start a timer loop
366 
367                         $control_ create_get_timer $url
368                 }
369         }
370 }
371 
372 #
373 # Put the contents of url into the cache. The data is stored
374 # in the filename <i>fn</i>. Even though the data might not
375 # have originated from local clients, we still count it as
376 # the most recently accessed in the LRU algorithm.
377 #
378 WebCacheApplication public put { url fn { seqno 0 } } {
379         $self instvar index_ cache_dir_ index_filename_ sockets_ \
380                 used_ total_ control_
381 
382         mtrace trcWC  "cache: put $url $fn"
383 
384         # do lru business
385         $self push_lru $url
386 
387         # if the data is already on disk, don't need to store a
388         # duplicate. this situation can arise if we have to different
389         # srm names for the same url.
390         if [info exists index_($url)] {
391                 return
392         }
393 
394         # check whether there is enough space in cache
395         # if not need to make room
396 
397         set fs [file size $fn]
398 
399         if { [expr $fs + $used_] > $total_ } {
400                 # need to make room in cache
401                 $self make_room [expr $fs - ($total_ - $used_)]
402         }
403 
404         set update_index_file 1
405 
406         set name cache[clock clicks]
407         set index_($url) [file join $cache_dir_ $name[file extension $url]]
408 
409         file rename $fn $index_($url)
410 
411         # write the index file
412         if [catch {set f [open $index_filename_ a]}] {
413                 set f [open $index_filename_ w 0644]
414         }
415 
416         puts $f [list $url $index_($url)]
417         close $f
418 
419         # tell cache control to cancel timers related for this url
420         #$control_ cancel_all_timers $url
421 }
422 
423 
424 #
425 # Adjust LRU stack
426 #
427 WebCacheApplication private push_lru { url } {
428         $self instvar lru_
429 
430         # this url is the most recently used so move it to the
431         # top of the lru stack
432         set idx [lsearch $lru_ $url]
433         if { $idx != -1 } {
434                 set lru_ [lreplace $lru_ $idx $idx]
435         }
436         lappend lru_ $url
437 }
438 
439 
440 #
441 # Fetch the contents of <i>url</i> from the origin server.
442 # This is non-blocking and calls done_fetch when the
443 # transactionis finished.
444 #
445 WebCacheApplication private fetch { url } {
446         mtrace trcWC  "cache: fetching... $url"
447         set token [::http::geturl $url -command "$self done_fetch"]
448 }
449 
450 
451 #
452 # Called when start_fetch is finished. This puts the contents
453 # fetched into the cache by passing the data to the put method.
454 # Do a callback to cache control to notify the data is here.
455 #
456 WebCacheApplication private done_fetch { token } {
457         $self instvar proxy_ control_ sockets_ index_ cache_dir_
458 
459         upvar #0 $token state
460         set url $state(url)
461 
462         mtrace trcWC  "cache: done_fetch $url"
463 
464         # if we received the data via the srm session while we
465         # were fetching the data from the origin server, we can
466         # just return because recv_data would have put the data in
467         # our cache and passed the data to the browser, if necessary.
468         if [info exists index_($url)] {
469                 return
470         }
471 
472         set fn [file join $cache_dir_ tmp[clock clicks]]
473         set f [open $fn w 0644]
474         fconfigure $f -translation binary
475         puts -nonewline $f "$state(http)\r\n"
476         foreach {name value} $state(meta) {
477                 puts -nonewline $f "$name: $value\r\n"
478         }
479         puts -nonewline $f "\r\n"
480         puts -nonewline $f $state(body)
481         close $f
482 
483         # tell cache control to cancel timers related for this url
484         $control_ cancel_all_timers $url
485 
486         # put the data onto disk
487         $self put $url $fn
488 
489         # send the data to session
490         $self send_data $url
491 }
492 
493 #
494 # Uses LRU algorithm to determine which object to evict from the
495 # cache. Subclasses should define their own strategies. <i>needed</i>
496 # denotes the amount of space needed in B.
497 #
498 WebCacheApplication public make_room { needed } {
499         $self instvar index_ lru_ used_
500 
501         set cleared 0
502 
503         while { $cleared < needed } {
504 
505                 # find the lru object in cache
506                 set url [lindex $lru_ 0]
507                 set fn $index_($url)
508                 set fs [file size $fn]
509 
510                 set cleared [expr $cleared + $fs]
511 
512                 $self flush_url $url
513         }
514 
515 }
516 
517 WebCacheApplication private flush_url { url } {
518         $self instvar index_ lru_ used_
519 
520         # remove the file and clean up cache index
521         file delete $fn
522         unset index_($url)
523         set lru_ [lrange $lru_ 1 end]
524 
525         set used_ [expr $used_ - $fs]
526 }
527 
528 WebCacheApplication public flush_all { } {
529         $self instvar index_ cache_dir_
530         file delete -force -- [glob -nocomplain [file join $cache_dir_ *]]
531         catch {unset index_}
532 }
533 
534 WebCacheApplication private create_dir { path } {
535         if { ![file isdirectory $path] } {
536                 set dir ""
537                 foreach split [file split $path] {
538                         set dir [file join $dir $split]
539                         if { ![file exists $dir] } {
540                                 # this command will cause an error
541                                 # if it is not possible to create the dir
542                                 file mkdir $dir
543                         }
544                 }
545         }
546 }
547 
548 #
549 # SRMv2 tcl callbacks
550 #
551 
552 # FIXME
553 set fn {/tmp/recv.dat}
554 
555 proc srm_recv { src cid seqno data } {
556         global cache fn
557 
558         set f [open $fn w 0644]
559         fconfigure $f -translation binary
560         puts -nonewline $f $data
561         close $f
562 
563         $cache recv_data $src $cid $seqno $fn
564 }
565 
566 proc srm_read_adu { src cid seqno } {
567         global cache
568 
569         set fn [$cache read_data $src $cid $seqno]
570 
571         if { $fn == "" } {
572                 return ""
573         } else {
574                 set f [open $fn]
575                 fconfigure $f -translation binary
576                 set buffer ""
577                 while { ![eof $f] } {
578                         append buffer [read $f 4096]
579                 }
580                 close $f
581 
582                 return $buffer
583         }
584 }
585 
586 proc srm_should_recover { src cid sseq eseq } {
587         global cache
588         $cache should_recover $src $cid $sseq $eseq
589 }
590 
591 
592 proc srm_recv_cid { src cid parent_cid name } {
593         global cache
594         $cache recv_cid $src $cid $parent_cid $name
595 }
596 
597 proc srm_source_update { src info } {
598         # do nothing now
599 }
600 
601 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.