#!/usr/bin/csh -f ############################################################################ # # AFTERNOON.sh -- # # AUTHOR: Andria Hunter (andria@sys.toronto.edu) # DATE: March 1995 # # DESC: # This c-shell script is submitted as a "crontab" job at 2pm each # afternoon. It runs statbot and does some fixing up of my HTML # files, and then it waits 2 hours before running statbot again. # This is repeated at 2 hour intervals (using sleep) until 10pm # in the evening. # # The reason that I don't start it until 2pm is because the daily page # count totals for the previous day are usually more interesting, up # until this time. It also saves system resources by running it # for only a limited period of time. # # USAGE: # submit this script as a crontab... # 0 14 * * * /usr/bin/csh -c /h/d/andria/public_html/statbot/AFTERNOON.sh # # The output from the crontab is mailed to me each day. # # If for some reason this script stops or you need to stop it, you can # cancel the crontab by killing the processes, and you can restart it # from the statbot directory by: # # AFTERNOON.sh >& AFTERNOON.log & # # NOTE: # The file called, MORNING.sh, is also submitted as a crontab job. # It starts running at 6am each morning, and usually takes about # two minutes to finish. On Monday mornings it takes a longer # because it must update the page_stats_each.html file, which # contains an access count for each page. # # CHANGES: # Sept 11, 1996: # - Upgraded to Statbot 4.0. Some of the updates are no # longer necessary. "del.fix_ps_uniq" is no longer needed. # April 20, 1995: # - modified so that this script may be executed on any host, not # just the www server, kipling.cs. It now copies the files from # the directory "/var/log/" on kipling.cs using "rsh." # Apr 10, 1996: # - The statbot program is no longer called at 55 minute # intervals. The main loop of this script now executes # at 2 hour intervals. This script now terminates if it # is 8:00pm or later. This means that the daily counts # will not be as high as they might otherwise be at the # end of the day. # May 17, 1996: # - Now we have large backup files (Mar_Aug_95, Sep_Dec_95, etc) # instead of just the httpd-xfer.andria file. These files must # be kept in compressed form because they are so large. # One record is kept in each of the large backup files # (FTPLOG) because statbot expects these files to exist. # However these files are really stored in the LRGLOG # directory. This directory must NOT contain any files # besides these log files because we use the "*" to match # the file names in this directory. # ############################################################################ ############################################################################ # **NB** Please check these variables before running this script... ############################################################################ # Time between updates to statbot pages... (55mins=3300secs=55*60) # (330 for 55 minutes between updates; 7200 for 2 hours) set SHORT_INTERVAL = "3300" set SHORT_INTERVAL = "7200" # If the current hour is this, then exit the script... (8:00 pm) set LAST_UPDATE_HR = "22" # Time between updates to statbot pages... (16hrs=57600secs=16*60*60) # (16 hours between updates) (add 30 secs) -- no longer needed since it # is now submitted as a crontab each day. ## set LONG_INTERVAL = "57630" # Statbot directory... set SBDIR = "~andria/public_html/statbot/" # Homepage directory ... set HPDIR = "~andria/public_html/" # Page Stats HTML file directory... set PSDIR = "~andria/public_html/page_stats/" # My directory for log files ... #set MYLOG = "/cs/ftp/pub/issac/test/log/" set MYLOG = "~andria/public_html/log/" # My directory for log files ... #set LRGLOG = "/cs/ftp/pub/issac/test/log/large_backup/" set LRGLOG = "~andria/public_html/log/large_backup/" # My directory for log files ... set MYFTP = "~andria/public_html/ftp/" # My directory for backup statbot pagestat files ... set PSBAK = "~andria/public_html/ps_files/" # My directory for unique host count for each day ... set UNIDIR = "~andria/public_html/unique/" # My directory for counts for all pages ... set CNTDIR = "~andria/public_html/all_pages/" ############################################################################ cd ${SBDIR} # Uncompress files containing all httpd records # \gunzip ${MYLOG}httpd-xfer.andria.gz echo "Starting AFTERNOON.sh on "`hostname`" ("`date`")" echo "===================================================" echo " " echo "Disable Script while manually running statbot..." echo "(Do not forget to recompress large_backup files)" echo " " exit while (1) ######################################################################### # This while loop is repeated every 55 minutes ######################################################################### echo "" echo "----------- Starting statbot ("`date`") -----------------" # Set constants for day/time set TODAY_DAY = `\date +%d` set TODAY_MON = `\date +%b` set TODAY_MONTH = `\date +%B` set TODAY_YEAR = `\date +%Y` set TODAY_TIME = `\date +%r | cut -c1-5,9-` # Remove old files \rm ${SBDIR}uniq_hp*.txt* \rm ${SBDIR}uniq_site*.txt* \rm ${SBDIR}.fix_index_html \rm ${SBDIR}.fix_page_stats_html \rm ${MYLOG}httpd-xfer ######################################################################### # Get a new copy of the host log file... # Make copies of the log files. Since we are not logged into kipling.cs, we # must get a copy using the "rsh" command, and store it in a temporary file. ######################################################################### rcp andria@kipling.cs:/var/log/httpd-xfer /tmp/del.httpd-xfer grep andria /tmp/del.httpd-xfer | \ grep -v puck.cdf | grep -v dvp.cs | grep -v univac.sys | \ grep -v 128.100.3.9 | grep -v 128.100.2.233 | grep -v Count.cgi | \ grep -v gifs | grep -v flags | grep -v Small_click_images > \ /tmp/httpd-xfer.myrecords \rm /tmp/del.httpd-xfer \mv /tmp/httpd-xfer.myrecords ${MYLOG}httpd-xfer ######################################################################### # Run the statbot program to create new "page_stats.html" file... ######################################################################### nice +19 statbot echo "" echo "Updating page_stats.html, index.html, index_bg.html, & plain.html ..." ######################################################################### # Summary of "txt" files created in SBDIR... # ---> uniq_hp_visits.txt # ---> uniq_hp_visits_today.txt # ---> uniq_site_visits.txt # ---> uniq_site_visits_today.txt # # Some of these files will be used in the later section which determines # the actual changes which will be made to index.html. Others are just # just created in case I ever want to look at them. ######################################################################### # + + + + + + Make temp files (for today) (uniq_hp_visits_today.txt) + + + + + + # For today: Visits to homepage only (not unique) cat ${MYLOG}httpd-xfer ${MYLOG}httpd-xfer.andria | \ grep "\[${TODAY_DAY}\/${TODAY_MON}\/${TODAY_YEAR}" | \ egrep '\/\~andria\/ |\/\~andria | \/\%7Eandria\/ | \/\%7Eandria ' >> \ ${SBDIR}del.hp_visits_today # For today: Visits to any page (not unique) cat ${MYLOG}httpd-xfer ${MYLOG}httpd-xfer.andria | \ grep "\[${TODAY_DAY}\/${TODAY_MON}\/${TODAY_YEAR}" > \ ${SBDIR}del.site_visits_today # For today: Visits to homepage only (unique) cat ${SBDIR}del.hp_visits_today | sed -f ${SBDIR}.get_host_name | \ sort -u > ${SBDIR}uniq_hp_visits_today.txt # + + + + + + File for homepage only (uniq_hp_visits.txt) + + + + + + # For all days: Visits to homepage only (unique) -- use zcat for backups # FIX cat ${MYLOG}httpd-xfer ${MYLOG}httpd-xfer.andria | \ egrep '\/\~andria\/ |\/\~andria | \/\%7Eandria\/ | \/\%7Eandria ' | \ sed -f ${SBDIR}.get_host_name | sort -u > ${SBDIR}del.delete zcat ${LRGLOG}httpd-xfer.*.gz | \ egrep '\/\~andria\/ |\/\~andria | \/\%7Eandria\/ | \/\%7Eandria ' | \ sed -f ${SBDIR}.get_host_name | sort -u >> ${SBDIR}del.delete sort -u ${SBDIR}del.delete > ${SBDIR}uniq_hp_visits.txt \rm ${SBDIR}del.delete # + + + + + + File for visits to any page today (uniq_site_visits_today.txt) + + + + # For today: Visits to any page (unique) cat ${SBDIR}del.site_visits_today | \ sed -f ${SBDIR}.get_host_name | \ sort -u > ${SBDIR}uniq_site_visits_today.txt # + + + + + + File for visits to any page (uniq_site_visits.txt) + + + + + + # For all days: Visits to any page (unique) -- use zcat for backups # FIX cat ${MYLOG}httpd-xfer ${MYLOG}httpd-xfer.andria | \ grep "andria" | sed -f ${SBDIR}.get_host_name | \ sort -u > ${SBDIR}del.delete zcat ${LRGLOG}httpd-xfer.*.gz | \ grep "andria" | sed -f ${SBDIR}.get_host_name | \ sort -u >> ${SBDIR}del.delete sort -u ${SBDIR}del.delete > ${SBDIR}uniq_site_visits.txt \rm ${SBDIR}del.delete ######################################################################### # Update these lines in my index.html homepage... # # These temporary filenames really should be more meaningful! ;-) # # ---> Total visits to Andria's Homepage: 199 (fix1) # ---> Unique visits to Andria's Homepage: 107 (fix2) # ---> Unique web site visits: 301 (fix3) # ---> Total web site visits: 4615 (fix4) # ---> Today: Total web site visits: 14 # ---> Today: Unique web site visits: 8 # (from 12:00 AM EST to 3:31 PM on Mar 15) (fix6) # ---> Today: Total visits to Andria's Homepage: 22(fix7) # ---> Today: Unique visits to Andria's Homepage: 16 # (from 12:00 AM EST to 06:35 PM on March 21) (fix8) ######################################################################### # --- fix1 (total visits to my homepage) --- echo `egrep "\/\~andria\/ |\/\~andria | \/\%7Eandria\/ | \/\%7Eandria " ${MYLOG}httpd-xfer | wc | cut -c1-8` + \ `egrep "\/\~andria\/ |\/\~andria | \/\%7Eandria\/ | \/\%7Eandria " ${MYLOG}httpd-xfer.andria | wc | cut -c1-8` > \ ${SBDIR}del.sum1 echo `zcat ${LRGLOG}httpd-xfer.Mar_Aug_95.gz | egrep "\/\~andria\/ |\/\~andria | \/\%7Eandria\/ | \/\%7Eandria " | wc | cut -c1-8` + \ `zcat ${LRGLOG}httpd-xfer.Sep_Dec_95.gz | egrep "\/\~andria\/ |\/\~andria | \/\%7Eandria\/ | \/\%7Eandria " | wc | cut -c1-8` + \ `zcat ${LRGLOG}httpd-xfer.Jan_Feb_96.gz | egrep "\/\~andria\/ |\/\~andria | \/\%7Eandria\/ | \/\%7Eandria " | wc | cut -c1-8` + \ `zcat ${LRGLOG}httpd-xfer.Mar_Apr_96.gz | egrep "\/\~andria\/ |\/\~andria | \/\%7Eandria\/ | \/\%7Eandria " | wc | cut -c1-8` + \ `zcat ${LRGLOG}httpd-xfer.May_Jun_96.gz | egrep "\/\~andria\/ |\/\~andria | \/\%7Eandria\/ | \/\%7Eandria " | wc | cut -c1-8` + \ `zcat ${LRGLOG}httpd-xfer.Jul_Aug_96.gz | egrep "\/\~andria\/ |\/\~andria | \/\%7Eandria\/ | \/\%7Eandria " | wc | cut -c1-8` + \ `zcat ${LRGLOG}httpd-xfer.Sep_Oct_96.gz | egrep "\/\~andria\/ |\/\~andria | \/\%7Eandria\/ | \/\%7Eandria " | wc | cut -c1-8` + \ `zcat ${LRGLOG}httpd-xfer.Nov_Dec_96.gz | egrep "\/\~andria\/ |\/\~andria | \/\%7Eandria\/ | \/\%7Eandria " | wc | cut -c1-8` + \ `zcat ${LRGLOG}httpd-xfer.Jan_Apr_97.gz | egrep "\/\~andria\/ |\/\~andria | \/\%7Eandria\/ | \/\%7Eandria " | wc | cut -c1-8` >> \ ${SBDIR}del.sum2 echo `cat ${SBDIR}del.sum1` + `cat ${SBDIR}del.sum2` > ${SBDIR}del.sum \rm ${SBDIR}del.sum1 ${SBDIR}del.sum2 echo "Total visits to Andria\'s Homepage: " \ `bc < ${SBDIR}del.sum`"/" > ${SBDIR}del.tmp1 echo "s/^Total visits to.*/"`cat ${SBDIR}del.tmp1` > ${SBDIR}del.fix1 # --- fix2 (unique visits to my homepage) --- echo "Unique visits to Andria\'s Homepage: " \ `wc ${SBDIR}uniq_hp_visits.txt | cut -c1-8`"/" > ${SBDIR}del.tmp2 echo "s/^Unique visits to.*/"`cat ${SBDIR}del.tmp2` > ${SBDIR}del.fix2 # --- fix3 (unique web site visits) --- echo "s/^Unique web.*/Unique web site visits: " \ `wc ${SBDIR}uniq_site_visits.txt | cut -c1-8`"/" > \ ${SBDIR}del.fix3 # --- fix4 (total web site visits) --- echo "s/^Total web.*/"`grep "Total Web site visits:" page_stats.html`/ > \ ${SBDIR}del.fix4 ######################################################################### # Update count for Andria's Homepage accesses in page_stats.html... # ---> Add "(Canada)" to hosts known to be located in Canada. # (See the .fix_countries for changes) # --->
  • Andria's Homepage: 375 Visits Today (4228 Total) (fix_ps/fix1) # Visits Today (stored in fix_ps) # Total (already stored in fix1) (not unique visits) ######################################################################### # --- fix_ps (visits to my homepage today) --- echo "
  • Andria's Homepage: "`wc ${SBDIR}del.hp_visits_today | cut -c1-8`\ "<\/B> Visits Today ("`bc < ${SBDIR}del.sum`\ " Total)
    (Andria\'s Homepage --- "\ `wc ${SBDIR}uniq_hp_visits_today.txt | cut -c1-8`\ " unique today, "`wc ${SBDIR}uniq_hp_visits.txt | cut -c1-8`\ "<\/B> unique total)/" > ${SBDIR}del.tmp3 echo "s/
  • Andria's Homepage:.*/"`cat ${SBDIR}del.tmp3` > ${SBDIR}del.fix_ps # --- fix5 (total web site visits) --- echo "Today: Total web site visits: "\ `wc ${SBDIR}del.site_visits_today | cut -c1-8`\ "<\/B>
    (from 12:00 AM EST to "\ `date +%r | cut -c1-5,9-`\ " on "${TODAY_MONTH}" "${TODAY_DAY}")/" > \ ${SBDIR}del.tmp5 echo "s/^Today<.B>: Total web.*/"`cat ${SBDIR}del.tmp5` > \ ${SBDIR}del.fix5 # --- fix6 (unique web site visits) --- echo "Today: Unique web site visits: "\ `wc ${SBDIR}uniq_site_visits_today.txt | cut -c1-8`\ "<\/B>/" > ${SBDIR}del.tmp6 echo "s/^Today<.B>: Unique web.*/"`cat ${SBDIR}del.tmp6` > \ ${SBDIR}del.fix6 # --- fix7 (total visits to my homepage) --- echo "Today: Total visits to Andria\'s Homepage: "\ `wc ${SBDIR}del.hp_visits_today | cut -c1-8`\ "<\/B>
    (from 12:00 AM EST to "\ `date +%r | cut -c1-5,9-`\ " on "${TODAY_MONTH}" "${TODAY_DAY}")/" > \ ${SBDIR}del.tmp7 echo "s/^Today<.B>: Total visits.*/"`cat ${SBDIR}del.tmp7` > \ ${SBDIR}del.fix7 # --- fix8 (unique visits to my homepage) --- echo "Today: Unique visits to Andria\'s Homepage: "\ `wc ${SBDIR}uniq_hp_visits_today.txt | cut -c1-8`"<\/B>/" > \ ${SBDIR}del.tmp8 echo "s/^Today<.B>: Unique visits.*/"`cat ${SBDIR}del.tmp8` > \ ${SBDIR}del.fix8 # --- add unique info to page_stats.html file --- # NOTE: Statbot4.0 now provides this, so the del.fix_ps_uniq # file that is created here is no longer used. echo "s/Web site visits today:/Unique Web site visits today: " \ `wc ${SBDIR}uniq_site_visits_today.txt | cut -c1-8` "<\/B>
    " \ "Unique Total Web site visits: " \ `wc ${SBDIR}uniq_site_visits.txt | cut -c1-8`\ "<\/B>

    " \ "Web site visits today:/" > ${SBDIR}del.fix_ps_uniq # Keep large files compressed... \gzip -9 ${SBDIR}uniq_hp_visits.txt \gzip -9 ${SBDIR}uniq_site_visits.txt # --- add link to hostnames file --- echo 's/Today.*$/Visitor List for Today<\/A><\/B>/' > ${SBDIR}del.fix_hostnames ######################################################################### # Now make the actual updates to page_stats.html and index.html... # # Notice that the substitution is made to a temporary file (using sed), # and then the "cp" command is used to update the actual html page. This # minimizes the chance of a user requesting one of my web pages, as it # is being updated. ######################################################################### # --- update page_stats.html --- # NOTE: We used to also cat the "del.fix_ps_uniq" file in the # following line, but this isn't necessary with Statbot 4.0. cat ${SBDIR}.fix_countries ${SBDIR}del.fix_ps ${SBDIR}del.fix_hostnames > \ ${SBDIR}.fix_page_stats_html cat ${SBDIR}page_stats.html | sed -f ${SBDIR}.fix_page_stats_html > \ ${SBDIR}del.tmp4 \mv ${SBDIR}del.tmp4 ${PSDIR}page_stats.html chmod a+r ${HPDIR}*.html chmod a+r ${PSDIR}*.html chmod a+r ${PSDIR}*.gif chmod a+r ${SBDIR}*.txt # --- update index.html --- cat ${SBDIR}del.fix[1-8] | sed -f ${SBDIR}.fix_backslash > ${SBDIR}.fix_index_html cat ${HPDIR}index.html | sed -f ${SBDIR}.fix_index_html > ${SBDIR}del.ndx \cp ${SBDIR}del.ndx ${HPDIR}index.html # --- update plain.html (same as index.html, but no graphics) --- cat ${HPDIR}plain.html | sed -f ${SBDIR}.fix_index_html > ${SBDIR}del.pln \cp ${SBDIR}del.pln ${HPDIR}plain.html # --- update index_bg.html (same as index.html, but background) --- cat ${HPDIR}index_bg.html | sed -f ${SBDIR}.fix_index_html > ${SBDIR}del.bg \cp ${SBDIR}del.bg ${HPDIR}index_bg.html # --- update page_stats_sum.html --- (same as updates to index.html) cat ${HPDIR}page_stats_sum.html | sed -f ${SBDIR}.fix_index_html > \ ${SBDIR}del.ps_sum \cp ${SBDIR}del.ps_sum ${PSDIR}page_stats_sum.html ###################################################################### # Decide how long until the next update... # If the time is between 14:00-00:00 and 00:00-1:00 then sleep 2 hours. # Otherwise sleep for 12 hours... # # (This is no longer needed, since we now have a separate script which # we run in the morning (see MORNING.sh), and since this script is # resubmitted as a crontab each day. Now we always wait 55 mins # until 11pm.) ###################################################################### # code for the above comment has been removed ###################################################################### # Make a file containing the names of all hosts which visited today # Lookup the IP numbers using nslookup (LOOKUP.sh). ###################################################################### ${SBDIR}LOOKUP.sh >& ${SBDIR}del.hostnames \rm ${HPDIR}page_stats_hostnames.html cat ${SBDIR}del.hostnames | sed -f ${SBDIR}.today_host_sub_after | \ sed -f ${SBDIR}.today_country_sub1 | \ sed -f ${SBDIR}.today_country_sub2 | \ sed -f ${SBDIR}.today_country_sub3 > \ ${HPDIR}page_stats_hostnames.html chmod go+r ${HPDIR}page_stats_hostnames.html ###################################################################### # Clean up temporary files... # Be explicit when deleting these files so that it doesn't # interfere with MORNING.sh on Monday Mornings... ###################################################################### \rm ${SBDIR}del.fix1 ${SBDIR}del.fix2 ${SBDIR}del.fix3 ${SBDIR}del.fix4 \rm ${SBDIR}del.fix5 ${SBDIR}del.fix6 ${SBDIR}del.fix7 ${SBDIR}del.fix8 \rm ${SBDIR}del.bg ${SBDIR}del.fix_hostnames ${SBDIR}del.fix_ps \rm ${SBDIR}del.fix_ps_uniq ${SBDIR}del.hp_visits_today ${SBDIR}del.ndx \rm ${SBDIR}del.pln ${SBDIR}del.ps_sum ${SBDIR}del.site_visits_today \rm ${SBDIR}del.sum ${SBDIR}del.tmp1 ${SBDIR}del.tmp2 ${SBDIR}del.tmp3 \rm ${SBDIR}del.tmp5 ${SBDIR}del.tmp6 ${SBDIR}del.tmp7 ${SBDIR}del.tmp8 ###################################################################### # Really I should just check for >=14 and <=22, but by using >=08, then # it handles the case where this script is started up before 2pm. # (not a problem any longer, since it is now submitted as a cron). ###################################################################### set CURR_HOUR = `\date +%H` set TODAY_DAY = `\date +%d` set TODAY_MON = `\date +%b` echo "" echo "Current hour is ${CURR_HOUR}" if ( ${CURR_HOUR} >= "08" && ${CURR_HOUR} <= ${LAST_UPDATE_HR}) then echo "sleeping for 55 mins, starting at `date`..." sleep ${SHORT_INTERVAL} else # Make a file which stores the count of the number of unique hosts # which visited today. grep ": Unique web site visits:" ${HPDIR}index.html > \ ${UNIDIR}uniq.${TODAY_MON}${TODAY_DAY} chmod go+r ${UNIDIR}uniq.${TODAY_MON}${TODAY_DAY} # Store names of all countries that have visited (in hostnames.html). # Make a link to this page from page_stats_hostnames.html. \rm ${SBDIR}hostnames.html grep "(.*)" ${HPDIR}page_stats_hostnames.html | \ sed -f ${SBDIR}.get_today_countries | \ sort -u > ${SBDIR}del.today_countries cat ${SBDIR}del.today_countries ${SBDIR}uniq_countries.txt | \ sort -u > ${SBDIR}del.total_countries \cp ${SBDIR}del.total_countries ${SBDIR}uniq_countries.txt cat ${SBDIR}open_hostnames.txt ${SBDIR}uniq_countries.txt \ ${SBDIR}close_hostnames.txt > ${SBDIR}hostnames.html \cp ${SBDIR}hostnames.html ${PSDIR}hostnames.html # Compress Log file at the end of each day, and then exit script # \gzip -9 ${MYLOG}httpd-xfer.andria echo "It is now after 10pm, so exit this script..." echo "The other script (MORNING.sh) kicks in at 6am tomorrow morning..." echo " " echo "Finished AFTERNOON.sh on "`\hostname`" ("`date`")" exit endif end