2%!

% PS WEB SITE LOG ANALYZER AND REPORTER
% ==============================================
% Copyright c 2003 by Don Lancaster and Synergetics, Box 809, Thatcher, AZ, 85552
% (428) 428-4073 don@tinaja.com http://www.tinaja.com
% Consulting services available per http://www.tinaja.com/info01.html 

/shortsourcefilename (u_ex100701.log) store

/sourcefilenameprefix (C:\\Documents and Settings\\Don 2\\Desktop\\log_play\\) store % the path of the input file



/addtomonth false store  % add this pass to monthly results?

/dayprocfilename (C:\\WINDOWS\\Desktop\\log_play\\trailing_stats\\dayproc1.psl)store

% All commercial rights and all electronic media rights fully reserved.
% Personal use permitted provided header and entire file remains intact.
% Linking welcome. Reposting expressly forbidden.

% version 2.06  start of monthly add-on

% This specialized PostScript-as-language routine reads a web logfile of a specified
% format, extracts useful data to arrays, and then processes those arrays for analysis.

% Included is an eBay photo frequency lister, usable for instant evaluation of
% item popularity as well as for image piracy detection.

% Also included is a viewer log system that quickly shows you which pages each
% and every individual viewer visited and which files they downloaded.

% New arrays are then created containing data for monthly or extended analysis.

% To use this program, enter the full path sourcefile and target file names 
% and keyphrase below and resave AS A STANDARD ASCII TEXTFILE. Then send to
% Acrobat Distiller. Then view log file.

% Note that a NO FILE PRODUCED message is normal and expected.

% IMPORTANT: Be sure to use "\\" when you mean "\" in a PostScript string!
%            Be sure to remove any ending nulls from your log file.

% IMPORTANT: Be sure there are no trailing nulls or other block characters at
%            the end of your logfile.

% IMPORTANT: Be sure logfile format agrees with your processing sequence!
%            Processing assumes a SINGLE si-p url in the sourcefile.

%  IMPORTANT NOTE: Don Lancaster's file gonzo.ps is required for this program.
%  After obvious location mods, uncomment ONE of the following two lines:




  (C:\\Documents and Settings\\Don\\Desktop\\Gonzo\\gonzo.ps) run  % use internal gonzo


%  (A:\\gonzo.ps) run  % use external gonzo

/guru { gonzo begin
ps.util.1 begin printerror nuisance begin} def   
guru                                           % activate gonzo utilities



/sourcefilename sourcefilenameprefix shortsourcefilename 
 mergestr store



% makestring converts a stack top array into a string...
 
/makestring {dup length string dup /NullEncode filter     
3 -1 roll {1 index exch write} forall pop} def

%%%%%%%%%%% Specify log and destination files here %%%%%%%%%%%%%%%%%%%%


% /sourcefilename (C:\\Documents and Settings\\Bee\\Desktop\\log_play\\ex031227.log) def    % the name of the input file

% destination file is not yet in use. It should be handy to pass data to monthly or
% other extended time routines...

/targetfilename (C:\\WINDOWS\\Desktop\\log_play\\curday.txt) def    % extracted referral output file

%%%%%%%%%%%%%

/workstring 20000 string def

% Assumed log file format is preceeded by four # comment lines. Each live entry consists
% of 16 space delimited printable ASCII strings in the following sequence...

      % date 
      % time 
      % s-ip 
      % cs-method 
      % cs-uri-stem 
      % cs-uri-query 
      % s-port 
      % cs-username 
      % c-ip 
      % cs(User-Agent) 
      % cs(Referer) 
      % sc-status 
      % sc-substatus 
      % sc-win32-status 
      % sc-bytes 
      % cs-bytes 
      % time taken

% Dynamic array allocation is probably too slow, so a scanned size system is used here.
% Present size should handle log files of 6 megs or less, depending on host.

/arraysize 0 store    % variable to be counted for data array lengths

/definesubarrays {

   /datearray arraysize array store       % date 
   /timearray arraysize array store       % time 
   /s-iparray arraysize array store       % s-ip 
   /cs-method arraysize array store       % cs method 
   /cs-uri-stem arraysize array store     % cs uri stem 
   /cs-uri-query arraysize array store    % cs-uri-query
   /s-port arraysize array store          % s-port 
   /cs-username arraysize array store     % cs-username query 
   /c-ip arraysize array store            % c-ip query  
   /cs-useragent arraysize array store    % cs-useragent  
   /cs-referer arraysize array store      % cs-referer        
   /sc-status arraysize array store       % sc-status 
   /sc-substatus arraysize array store    % sc-substatus  
   /sc-win32ss arraysize array store      % sc-substatus 
   /sc-bytes arraysize array store        % sc-bytes 
   /cs-bytes arraysize array store        % cs-bytes 
   /time-taken arraysize array store      % time-taken

       } store

%%%%%%%%% (A) Strip log file to individual arrays %%%%%%%%%%%%%%%%

/linecount -1 store   % gets bumped to zero on first pass
/filelength 0 store

/logfileerror { (----> ) print   curline == (\n\n) print flush 

(Format error on line ) linecount                % hang on bad format
      20 string cvs (.\n\n) print flush formaterror } store

% Note that PostScript string dereferencing is essential, as reuse of a string will
% trash pointers in the arrays.


(\n\nBeginning analysis of web log file ) sourcefilename mergestr (.\n\n\n) mergestr
print flush   % add header to log file


/splitintoarrays {curline  

        ( ) search {exch pop % dump space
        dup length string cvs             % dereference!!!!
        datearray exch linecount exch put 

        ( ) search {exch pop % dump space
        dup length string cvs  % dereference!!!!
        timearray exch linecount exch put  

        ( ) search {exch pop % dump space
        dup length string cvs    % dereference!!!!
        s-iparray exch linecount exch put  

        ( ) search {exch pop % dump space
        dup length string cvs  % dereference!!!!
        cs-method exch linecount exch put  

        ( ) search {exch pop % dump space
        dup length string cvs % dereference!!!!
        cs-uri-stem exch linecount exch put
  
        ( ) search {exch pop % dump space
        dup length string cvs % dereference!!!!
        cs-uri-query exch linecount exch put 

        ( ) search {exch pop % dump space
        dup length string cvs  % dereference!!!!
        s-port exch linecount exch put 

        ( ) search {exch pop % dump space
        dup length string cvs % dereference!!!!
        cs-username exch linecount exch put 

        ( ) search {exch pop % dump space
        dup length string cvs % dereference!!!!
        c-ip exch linecount exch put

        ( ) search {exch pop % dump space
        dup length string cvs % dereference!!!!     longer string possible here
        cs-useragent exch linecount exch put

        ( ) search {exch pop % dump space
        dup length string cvs % dereference!!!!     longer string possible here
        cs-referer exch linecount exch put

        ( ) search {exch pop % dump space
        dup length string cvs % dereference!!!!     
        sc-status exch linecount exch put

        ( ) search {exch pop % dump space
        dup length string cvs % dereference!!!!     
        sc-substatus exch linecount exch put

        ( ) search {exch pop % dump space
        dup length string cvs % dereference!!!!     
        sc-win32ss exch linecount exch put

        ( ) search {exch pop % dump space
        dup length string cvs % dereference!!!!     
        sc-bytes exch linecount exch put

        ( ) search {exch pop % dump space
        dup length string cvs % dereference!!!!     
        cs-bytes exch linecount exch put

        dup length string cvs                % dereference!!!! 
        time-taken exch linecount exch put   % default remainder

                            
                            }{logfileerror} ifelse  % cs-bytes
                            }{logfileerror} ifelse  % sc-bytes
                            }{logfileerror} ifelse  % sc-win32ss
                            }{logfileerror} ifelse  % sc-substatus   
                            }{logfileerror} ifelse  % sc-status
                            }{logfileerror} ifelse  % cs-referrer
                            }{logfileerror} ifelse  % cs-useragemt
                            }{logfileerror} ifelse  % c-ip
                            }{logfileerror} ifelse  % cs-username
                            }{logfileerror} ifelse  % s-port
                            }{logfileerror} ifelse  % cs-uri-query
                            }{logfileerror} ifelse  % cs-uri-stem
                            }{logfileerror} ifelse  % cs-method
                            }{logfileerror} ifelse  % s-iparray
                            }{logfileerror} ifelse  % time
                            }{logfileerror} ifelse  % date

} store

% processline strips out short lines or those starting with a # comment delimiter     

/processline {  /curline exch store    

                curline length 5 gt {           % skip very short lines as errors.
                curline 0 get (#)0 get ne   % skip lines starting with #

             {   % skip lines starting with #

     /linecount linecount 1 add store           % save for later

     linecount 1 add 2500 mod 0 eq {(Processed ) linecount 1 add 20 string cvs mergestr
          ( log lines\n) mergestr print flush} if
    
     /filelength workfile fileposition store    % save for later - posn only if valid
      splitintoarrays
                                } if  % commentskipper
                                } if  % shortline skipper
     } def


% /findarraylength makes a preliminary pass thorugh the log file to determine the
% number of live log lines that need actual processing. This avoids a fixed array
% size definition or the overhead of dynamic array size allocation.

/findarraylength {sourcefilename (r) file
              /workfile exch def                 % make a file to read
              {mark
               workfile workstring readline      % read one line at a time
               {/curline0 exch store
                curline0 length 5 gt {            % skip very short lines as errors.
                curline0 0 get (#)0 get ne {       % skip lines starting with #
                /arraysize arraysize 1 add        % one count per useful
                store
                                } if  % commentskipper
                                } if  % shortline skipper
               }{cleartomark exit} ifelse        % test lines till done
               cleartomark                       % just in case sloppy
              } loop

             % arraysize ==                     % optional check of array length
               definesubarrays
              } def

% /striplogfile reads the logfile one line at a time and passes the results to
% processline for array allocation. This is the main reading loop

/striplogfile {sourcefilename (r) file
              /workfile exch def                 % make a file to read
              {mark   
               workfile workstring readline      % read one line at a time
               {processline}{cleartomark exit} ifelse        % test lines till done
               cleartomark                       % just in case sloppy
              } loop
              } def


(\nStarting array size count..\n) print flush


stopwatchon
findarraylength
stopwatchoff

(\nStarting array splitout..\n\n) print flush
stopwatchon
striplogfile  % this does it
stopwatchoff


(\nLogfile length was approximately ) filelength 68 add 20 string cvs mergestr
( characters.\n\n) mergestr print flush

(Total hits for session are ) linecount 1 add 20 string cvs mergestr
(.\n) mergestr print flush


%%%%%%%%%% Utility I -- OPTIONAL ARRAY VIEWER %%%%%%%%%%%

/wanttoseearrays false store   % change to true to view

wanttoseearrays {
datearray ==
timearray ==
s-iparray ==
cs-method ==
cs-uri-stem ==
cs-uri-query ==   
s-port == 
cs-username ==
c-ip == 
cs-referer ==
sc-status == 
sc-substatus ==
sc-win32ss ==
sc-bytes ==
cs-bytes ==
time-taken ==
               } if


% From this point, no further logfile access is needed, as all data is in 
% sixteen internal PostScript arrays per the above names.


%%%%%%%%% Utility II - BUBBLE SORT %%%%%%%%%%%%%%%%%%%%%%%%

% Sorts a defined array by the numeric value of its SECOND field in descending
% order. Speed seems acceptable even without optimization. Minimal extra storage used.

/swapbub { /bshold bsarray bspos get store         % temporary stash for rollover
           bsarray bspos 1 add get                 % move next to previous
           bsarray exch bspos exch put
           bsarray bspos 1 add bshold put} store   % move previous hold to next


/bubblesort { /bsarray exch store  

    bsarray length {                               % n trips max - can shorten later

        0 1 bsarray length 2 sub {/bspos exch store  % for all but last entry
        bsarray bspos get  1 get                     % compare present with next
        bsarray bspos 1 add get 1 get
                    lt {swapbub} if } for            % swap if next is bigger

                    } repeat  } store               % repeat till done

%%%%%%%%% (B) FIND AND REPORT PAGE VIEWS %%%%%%%%%%%%%%%%%%%%%%%%

% The assumptions made are that all actual website pages are .asp files specified
% in lower case. On tinaja, most .html files redirect to .asp or are reference files. 

/findandreportpageviews {

0
cs-uri-stem {(.asp) search {pop pop pop 1 add }{pop}ifelse } forall

/pageviews exch store

(Total pageviews for session are ) pageviews 20 string cvs mergestr
(.\n) mergestr print flush

(Hits per page viewed are ) linecount pageviews div 100 mul cvi 100 div
 20 string cvs mergestr
(.\n\n) mergestr print flush




0
cs-uri-stem {(whtnu.xml) search {pop pop pop 1 add }{pop}ifelse } forall
/xmlviews exch store

(Total whtnu.xml views (RSS) for session are ) xmlviews 20 string cvs mergestr
(.\n\n\n) mergestr print flush

                        } store

true {findandreportpageviews} if       % switch on or off


%%%%%%%%% (C) FIND AND RANK PAGE POPULARITY %%%%%%%%%%%%%%%%%%%%%%%%

% The assumptions made are that all actual website pages are .asp files specified
% in lower case. 

% create a raw array of .asp hits

/rawwebpagehitlist mark                       % start array
         cs-uri-stem {(.asp) search           % look for .asp trailers
         {exch mergestr exch pop }            % assemble and save filename
         {pop}ifelse } forall                 % clear stack if not
                    ] store                   % finish def

% rawwebpagehitlist ==                        % optional viewing check

% create a one entry unique webpage hit list. It will expand on the fly.
% zero hits gets incremented on first pass.

/uwphlist [ [rawwebpagehitlist 0 get 0 ] ] store   % define array

% /stuffunique creates a new unsorted array of webpage hits
% each array element is of form [[(pagename) hitcount ][(nexpagename) hitcount ]]

/stuffunique {/curpage exch store              % save the current page url  
/neednewentry true store                       % assume an update is needed

     0 1 uwphlist length 1 sub {/curpos exch store   % scan the existing hit list    
     uwphlist curpos get 0 get curpage eq            % already present?  
         {uwphlist curpos get 1 get 1 add            % increment count 
          uwphlist curpos get exch 1 exch put 
          /neednewentry false store     }if          % clear newentry flag
           } for 


     neednewentry { mark uwphlist aload pop      % if new needed, put old on stack 
            [ curpage  1 ] ]                     % and dynamically expand list
              /uwphlist exch store
            } if

                } store


/breakcount 60 store                % optional debugger - uncomment if used

rawwebpagehitlist {stuffunique      % expand array as forall loop
                       % /breakcount breakcount 1 sub store 
                       % breakcount 0 eq {exit} if   
                      } forall

% now have to bubble sort highest first...

% uwphlist ==  % optional check
uwphlist bubblesort

(Pages viewed list by popularity:\n\n) print flush

uwphlist {(           ) print ==} forall   % indent list

(\nTotal pages visited at least once = )
uwphlist length  /uwpl exch store uwpl
20 string cvs mergestr (.\n) mergestr print flush


(Total visits checksum = )
0 uwphlist {1 get add} forall /tvc exch store tvc
20 string cvs mergestr (.\n) mergestr print flush

(Average visits per page = )
tvc uwpl div 100 mul cvi 100 div  /avp exch store avp
20 string cvs mergestr (.\n\n) mergestr print flush

%%%%%%%%% (D) FIND AND RANK IMAGE VIEWS %%%%%%%%%%%%%%%%%%%%%%%%

% This feature is especially handy in finding eBay image piracy.
% It reports all access to an /images/bargs/ subdirectory. The report shows
% the actual item name rather than the eBay serial number.

% Most of the returns will in fact be eBay, with our direct bargain pages also
% creating hits. Should eBay only results be wanted, a cross check with eBay in
% the referral array could be separately done. You can also place eBay images ONLY
% in a given subdirectory.

% create a raw array of image hits

/imagehitlist mark                       % start array
         cs-uri-stem {(/images/bargs/) search       % look for /image/subdirectory
         {pop pop }                           % save only image filename
         {pop}ifelse } forall                 % clear stack if not
                    ] store                   % finish def

 % imagehitlist ==                        % optional viewing check


%  (\n\n\n\n\n\n) print flush       % temp degub if uncommented


% create a one entry unique image hit list. It will expand on the fly.
% zero hits gets incremented on first pass.

/imghlist [ [imagehitlist 0 get 0 ] ] store   % define array


% imghlist ==  (\n\n\n\n\n) print flush    % temp debug if uncommented

% /stuffunique creates a new unsorted array of webpage hits
% each array element is of form [[(pagename) hitcount ][(nexpagename) hitcount ]]

/stuffunique {/curpage exch store              % save the current page url  
/neednewentry true store                       % assume an update is needed

     0 1 imghlist length 1 sub {/curpos exch store   % scan the existing hit list    
     imghlist curpos get 0 get curpage eq            % already present?  
         {imghlist curpos get 1 get 1 add            % increment count 
          imghlist curpos get exch 1 exch put 
          /neednewentry false store     }if          % clear newentry flag
           } for 


     neednewentry { mark imghlist aload pop      % if new needed, put old on stack 
            [ curpage  1 ] ]                     % and dynamically expand list
              /imghlist exch store
            } if

                } store


/breakcount 60 store                % optional debugger - uncomment if used

imagehitlist {stuffunique      % expand array as forall loop
                       % /breakcount breakcount 1 sub store 
                       % breakcount 0 eq {exit} if   
                      } forall


% now have to bubble sort highest first...

% imghlist ==  % optional check
imghlist bubblesort

(Images downloaded list by popularity:\n\n) print flush

imghlist {(           ) print ==} forall   % indent list

(\nTotal images visited at least once = )
imghlist length  /impl exch store impl
20 string cvs mergestr (.\n) mergestr print flush

(Total images checksum = )
0 imghlist {1 get add} forall /tic exch store tic
20 string cvs mergestr (.\n) mergestr print flush

(Average visits per image = )
tic impl div 100 mul cvi 100 div  /avp exch store avp
20 string cvs mergestr (.\n\n) mergestr print flush


%%%%%%%%% (E) FIND AND REPORT DELIVERED BANNERS %%%%%%%%%%%%%%%%%%%%%%%%

% The assumption made is that all banners are in a /banners/ subdirectory
% Note that previous page revisits use uncounted viewer cached banners.
% Hence the number of Guru banners does not equal 8X the number of page views.

0
cs-uri-stem {(/banners/) search {pop pop pop 1 add }{pop}ifelse } forall

/bannersdelivered exch store

(Total new ad banners delivered are ) bannersdelivered 20 string cvs mergestr
(.\n\n) mergestr print flush




%%%%%%%%% (F) FIND AND RANK FILE DOWNLOADS %%%%%%%%%%%%%%%%%%%%%%%%

% This feature reports on all hits to a /glib/ subdirectory. This subdirectory
% holds most of our tinaja pdf and similar files.

% File download counts may be inflated by PDF byte range retrival or other factors.

% create a raw array of file hits

/filehitlist mark                       % start array
         cs-uri-stem {(/glib/) search       % look for /image/subdirectory
         {pop pop }                           % save only image filename
         {pop}ifelse } forall                 % clear stack if not
                    ] store                   % finish def

%  filehitlist ==                        % optional viewing check
%  (\n\n\n\n\n\n) print flush       % temp degub if uncommented


% create a one entry unique file hit list. It will expand on the fly.
% zero hits gets incremented on first pass.

/fhlist [ [filehitlist 0 get 0 ] ] store   % define array


%  fhlist ==  (\n\n\n\n\n) print flush    % temp debug if uncommented




% /stuffunique creates a new unsorted array of webpage hits
% each array element is of form [[(pagename) hitcount ][(nexpagename) hitcount ]]

/stuffunique {/curpage exch store              % save the current file url  
/neednewentry true store                       % assume an update is needed

     0 1 fhlist length 1 sub {/curpos exch store   % scan the existing hit list    
     fhlist curpos get 0 get curpage eq            % already present?  
         {fhlist curpos get 1 get 1 add            % increment count 
          fhlist curpos get exch 1 exch put 
          /neednewentry false store     }if          % clear newentry flag
           } for 


     neednewentry { mark fhlist aload pop      % if new needed, put old on stack 
            [ curpage  1 ] ]                     % and dynamically expand list
              /fhlist exch store
            } if

                } store


/breakcount 60 store                % optional debugger - uncomment if used

filehitlist {stuffunique      % expand array as forall loop
                       % /breakcount breakcount 1 sub store 
                       % breakcount 0 eq {exit} if   
                      } forall

% now have to bubble sort highest first...

% fhlist ==  % optional check


(Files downloaded list by popularity:\n\n) print flush

fhlist bubblesort

fhlist {(           ) print ==} forall   % indent list

(\nTotal files downloaded at least once = )
fhlist length  /fhl exch store fhl
20 string cvs mergestr (.\n) mergestr print flush

(Total files checksum = )
0 imghlist {1 get add} forall /tfc exch store tfc
20 string cvs mergestr (.\n) mergestr print flush

(Average visits per file = )
tfc fhl div 100 mul cvi 100 div  /avf exch store avf
20 string cvs mergestr (.\n\n) mergestr print flush


%%%%%%%%% (G) FIND AND RANK POSTSCRIPT UTILITY DOWNLOADS %%%%%%%%%%%%%%%%%%%%%%%%

% This feature reports on all hits to a /glib/ subdirectory. This subdirectory
% holds most of our tinaja PostScript utility and similar files.

% create a raw array of file hits

/pshitlist mark                             % start array
         cs-uri-stem {(/psutils/) search       % look for /image/subdirectory
         {pop pop }                           % save only image filename
         {pop}ifelse } forall                 % clear stack if not
                    ] store                   % finish def

 % pshitlist ==                        % optional viewing check
 % (\n\n\n\n\n\n) print flush       % temp degub if uncommented



% create a one entry unique file hit list. It will expand on the fly.
% zero hits gets incremented on first pass.

/pshlist [ [pshitlist 0 get 0 ] ] store   % define array


% pshlist ==  (\n\n\n\n\n) print flush    % temp debug if uncommented


% /stuffunique creates a new unsorted array of webpage hits
% each array element is of form [[(pagename) hitcount ][(nexpagename) hitcount ]]

/stuffuniqueps {/curpage exch store              % save the current postscript util url  
/neednewentry true store                       % assume an update is needed

     0 1 pshlist length 1 sub {/curpos exch store   % scan the existing hit list    
     pshlist curpos get 0 get curpage eq            % already present?  
         {pshlist curpos get 1 get 1 add            % increment count 
          pshlist curpos get exch 1 exch put 
          /neednewentry false store     }if          % clear newentry flag
           } for 


     neednewentry { mark pshlist aload pop      % if new needed, put old on stack 
            [ curpage  1 ] ]                     % and dynamically expand list
              /pshlist exch store
            } if

                } store


/breakcount 60 store                % optional debugger - uncomment if used

pshitlist {stuffuniqueps      % expand array as forall loop
                       % /breakcount breakcount 1 sub store 
                       % breakcount 0 eq {exit} if   
                      } forall

% now have to bubble sort highest first...

%  pshlist ==  % optional check


(PostScript utilties downloaded list by popularity:\n\n) print flush

pshlist bubblesort

pshlist {(           ) print ==} forall   % indent list

(\nTotal PostScript utilities downloaded at least once = )
pshlist length  /pshl exch store pshl
20 string cvs mergestr (.\n) mergestr print flush

(Total PostScript utilities checksum = )
0 pshlist {1 get add} forall /tps exch store tps
20 string cvs mergestr (.\n) mergestr print flush

(Average visits per PostScript Utility = )
tps pshl div 100 mul cvi 100 div  /avf exch store avf
20 string cvs mergestr (.\n\n) mergestr print flush


%%%%%%%%% (G) FIND AND RANK GONZO.PS DOWNLOADS %%%%%%%%%%%%%%%%%%%%%%%%

% This is an example of stripping out a particular file or two of interest.



/gonzofound false store
pshlist { 

mark exch aload pop (glotz) ]

dup
0 get 


(gonzo.ps) eq {


mark exch aload pop 0 0 0 ]


1 get /gonzofound true store  }{pop} ifelse } forall

gonzofound not {(0)(0)} if   % no number if no gonzo

/gonzohits exch store

(\Number of GONZO.PS downloads is )

gonzohits
gonzofound not {0} if  20 string cvs mergestr (.\n\n) mergestr print flush


%%%%%%%%% (I) FIND AND RANK 404 FILE NOT FOUND ERRORS %%%%%%%%%%%%%%%%%%%%%%%%

% This feature finds all the 404 file not found errors and ranks them.
% Similar procs can be applied to other error codes.

% This differs from file reporters in that the 404 array position needs 
% discovered so that the actual error request can be separately read.

% Note that your 404 error rate may be grossly inflated by piracy attempts.
% Be sure to separate the inside correctable errors from the outside ones.

% Scan sc-status for 404 errors...

/e404list      mark                          % start array

        0 1 sc-status length 1 sub           % look for 404 errors
            {/scposn exch store
            sc-status scposn get (404) eq 
            { cs-uri-stem scposn get } if    % find 404 requestor
            } for
            ] store


/e404hl [ [e404list 0 get 0 ] ] store   % define array

% e404hl ==  (\n\n\n\n\n) print flush    % temp debug if uncommented


% /stuffunique404 creates a new unsorted array of 404 file not found hits
% each array element is of form [[(pagename) hitcount ][(nexpagename) hitcount ]]

/stuffunique404 {/curpage exch store              % save the current postscript util url  
/neednewentry true store                       % assume an update is needed

     0 1 e404hl length 1 sub {/curpos exch store   % scan the existing hit list    
     e404hl curpos get 0 get curpage eq            % already present?  
         {e404hl curpos get 1 get 1 add            % increment count 
          e404hl curpos get exch 1 exch put 
          /neednewentry false store     }if          % clear newentry flag
           } for 


     neednewentry { mark e404hl aload pop      % if new needed, put old on stack 
            [ curpage  1 ] ]                     % and dynamically expand list
              /e404hl exch store
            } if

                } store


e404list {stuffunique404      % expand array as forall loop
                       
                      } forall


%  e404hl ==  % optional check


(File Not Found 404 errors by severity:\n\n) print flush

e404hl bubblesort

e404hl {(           ) print ==} forall   % indent list


(\nTotal unique 404 file not found errors = )
e404hl length  /ee404 exch store ee404
20 string cvs mergestr (.\n) mergestr print flush

(Total 404 errors checksum = )
0 e404hl {1 get add} forall /t4c exch store t4c
20 string cvs mergestr (.\n) mergestr print flush

(The 404 file not found errors as a percentage of total hits = )
t4c linecount div 100 mul 10000 mul cvi 10000 div  /er404 exch store er404
20 string cvs mergestr (%.\n\n) mergestr print flush


%%%%%%%%% (J) EBAY IMAGE THEFT DETECTOR & ACTIVITY REPORTER %%%%%%%%%%%%%%%%%%


% Revised version for eBay log file change of 1 August 2005.


% This can be used to measure the actual viewings of your current eBay
% offerings and their popularity. Or will quickly show you if anone else
% is attempting to steal your images.

% initial format is http://cgi.ebay.com/Refrigerator-freezer-ener
% gy-saver-10-amp-plugin_W0QQitemZ5982681673QQcategoryZ3188QQtcZ
% photoQQrdZ1QQcmdZViewItem 200 0 0 284506 389 90782

% we id on http://www.dbi.ebay.com/ and then search for hopefully unique "-/",   % this has changed   some remain missed.
% extracting the name only. Initially producing this format...

% [(Refrigerator-freezer-ener...) 15 ] 

(eBay image requests by popularity:\n\n) print flush

(Raw closed item hit list :\n\n) print flush


/targetstr (http://cgi.ebay.com/) store            % new eBay log referral prefix

/oldebayhit 0 store


/grabnewlogonly {  /curlstr exch store             % finds image name string

          curlstr (-/) search {exch pop exch pop     }{   == %           pop  % temp                % changed this  was _W


  /oldebayhit oldebayhit 1 add store

                          }ifelse



                  } store





/ebay2image   {   mark                          % start array

        0 1 cs-referer length 1 sub           % look for eBay referral
            {/scposn exch store
            cs-referer scposn get targetstr
            search   
            { pop pop grabnewlogonly   }{pop} ifelse


    
       
                                                                                           %   dup ==




            } for




            ]  /e2i exch store          } store


ebay2image

%% (\n\n\n\ne21 report follows \n) print flush

%%%  e2i ==       %%%%%%%%%%%%%%%%%%%%%%%%%%%%%  ok to here 





/e2ihl [ [e2i  0 get 0 ] ]   store   % define ebay image log hit array










% /stuffuniqueebay creates a new unsorted array of ebay image hits
% each array element is of form [[(imagedata) hitcount ][(imagedata) hitcount ]]

/stuffuniqueebay {/curpage exch store              % save the current ebya image data string  
/neednewentry true store                          % assume an update is needed

     0 1 e2ihl length 1 sub  {/curpos exch store   % scan the existing hit list    
     e2ihl curpos get 0 get curpage   eq       % already present?  



         {e2ihl curpos get  1 get 1 add            % increment count 
          e2ihl curpos  get  exch 1 exch put   

          /neednewentry false store     }if          % clear newentry flag
           } for 


     neednewentry { mark e2ihl aload pop      % if new needed, put old on stack 
            [ curpage  1 ] ]                     % and dynamically expand list
              /e2ihl exch store
            } if

                } store





e2i {

stuffuniqueebay      % expand array as forall loop
                                

        } forall





(\n\n\neBay auction and store hits (multiple photos will bump counts) :\n\n) print flush

  e2ihl bubblesort

  e2ihl {(           ) print ==} forall   % indent list









(\nTotal eBay item requests = )
e2ihl length  /eir exch store eir
20 string cvs mergestr (.\n) mergestr print flush

(Total eBay images requested = )
0 e2ihl {1 get add} forall /teir exch store teir
20 string cvs mergestr (.\n) mergestr print flush

(Average eBay image requests per item = )
teir eir div 100 mul cvi 100 div  /airpi exch store airpi
20 string cvs mergestr (.\n) mergestr print flush

(Closed eBay items not named = ) oldebayhit 20 string cvs mergestr 
(.\n\n) mergestr print flush




%%%%%%%%% (K) FILTER AND RANK USEFUL REFERRALS %%%%%%%%%%%%%%%%%%%%%%%%

% This sc-referal log entry often tells you where a visitor just came from. 
% Useful referrals can tell you which sites have links to yours and when you
% have gotten slashdotted or involved in a robot pissing contest.

% Not all log entries have referrals, and the majority of the remaining often
% will come from your own site. Further, most entries with a "?" in them will
% probably be a search query that you may want to separately split out.

% Thus, heavy filtering of referals is often desirable.


(Filtered referrals by popularity:\n\n) print flush


% create an array of referral excludes...

/refexcludes [(tinaja)(TINAJA)(Tinaja)(24.120.195.24)

         (ebay) (eBay)(?) (XXXX) (++++)] store

% create a raw filtered array of referral hits...


/filterreferrals { /rawref exch store

    rawref length 5 ge {                    % ignore short strings
     
        false

           refexcludes {rawref exch search  {pop pop pop true or}{pop false or} ifelse}forall
            
       not  {rawref} if
                } if            } store


/freflist mark cs-referer {filterreferrals    } forall ] store


%  freflist ==  (\n) print flush            % uncomment for debug


% create a one entry unique file hit list. It will expand on the fly.
% zero hits gets incremented on first pass.

/refhlist [ [freflist 0 get 0 ] ] store   % define array

% refhlist ==  (\n\n\n\n\n) print flush    % temp debug if uncommented


% /stuffuniqueref creates a new unsorted array of webpage hits
% each array element is of form [[(pagename) hitcount ][(nexpagename) hitcount ]]

/stuffuniqueref {/curpage exch store              % save the current postscript util url  
/neednewentry true store                          % assume an update is needed

     0 1 refhlist length 1 sub {/curpos exch store   % scan the existing hit list    
     refhlist curpos get 0 get curpage eq            % already present?  
         {refhlist curpos get 1 get 1 add            % increment count 
          refhlist curpos get exch 1 exch put 
          /neednewentry false store     }if          % clear newentry flag
           } for 


     neednewentry { mark refhlist aload pop      % if new needed, put old on stack 
            [ curpage  1 ] ]                     % and dynamically expand list
              /refhlist exch store
            } if

                } store


/breakcount 60 store                % optional debugger - uncomment if used

freflist {stuffuniqueref      % expand array as forall loop
                       % /breakcount breakcount 1 sub store 
                       % breakcount 0 eq {exit} if   
                      } forall



% refhlist ==    % optional check

% now have to bubble sort highest first...


refhlist bubblesort

refhlist {(           ) print ==} forall   % indent list

(\nTotal unique and useful Referrals = )
refhlist length  /tuur exch store tuur
20 string cvs mergestr (.\n) mergestr print flush

(Total useful Referrals = )
0 refhlist {1 get add} forall /tur exch store tur
20 string cvs mergestr (.\n) mergestr print flush

(Average visits per Referral = )
tur tuur div 100 mul cvi 100 div  /avf exch store avf
20 string cvs mergestr (.\n\n) mergestr print flush




%%%%%%%%% (L) FILTER AND RANK SEARCH QUERIES %%%%%%%%%%%%%%%%%%%%%%%%

% A referral that has a "?" in it is often a search engine query rather than
% a visitor site. This utility ranks and reports on search queries.

% This module can be expanded to rank the search engines used or to try and
% split out the actual search queries. A sort priority bug also still exists.

/omiteBay true store     % omit any hits from eBay?

(Filtered search queries by popularity:\n\n) print flush

% create a raw filtered array of search engine hits...

/filtersearchengines { /rawref exch store

  omiteBay {rawref (ebay) search {pop pop pop false}{pop true} ifelse }{true} ifelse

            rawref (?) search {pop pop pop true}{pop false} ifelse 
            and {rawref} if

                      } store

/fseflist mark cs-referer {filtersearchengines    } forall ] store


 % fseflist ==  (\n) print flush            % uncomment for debug

% create a one entry unique file hit list. It will expand on the fly.
% zero hits gets incremented on first pass.

/fsehlist [ [fseflist 0 get 0 ] ] store   % define array

% fsehlist ==  (\n\n\n\n\n) print flush    % temp debug if uncommented


% /stuffuniquesrc creates a new unsorted array of webpage hits
% each array element is of form [[(pagename) hitcount ][(nexpagename) hitcount ]]

/stuffuniquesrc {/curpage exch store              % save the current postscript util url  
/neednewentry true store                          % assume an update is needed

     0 1 fsehlist length 1 sub {/curpos exch store   % scan the existing hit list    
     fsehlist curpos get 0 get curpage eq            % already present?  
         {fsehlist curpos get 1 get 1 add            % increment count 
          fsehlist curpos get exch 1 exch put 
          /neednewentry false store     }if          % clear newentry flag
           } for 


     neednewentry { mark fsehlist aload pop      % if new needed, put old on stack 
            [ curpage  1 ] ]                     % and dynamically expand list
              /fsehlist exch store
            } if

                } store


/breakcount 60 store                % optional debugger - uncomment if used
fseflist {stuffuniquesrc      % expand array as forall loop
                       % /breakcount breakcount 1 sub store 
                       % breakcount 0 eq {exit} if   
                      } forall



%  fsehlist ==    % optional check




% now have to bubble sort highest first...


fsehlist bubblesort

fsehlist {(           ) print ==} forall   % indent list



(\nTotal search queries = )
fsehlist length  /tsq exch store tsq
20 string cvs mergestr (.\n) mergestr print flush

(Total unique search queries = )
0 fsehlist {1 get add} forall /tusq exch store tusq
20 string cvs mergestr (.\n) mergestr print flush

(Average search query repeats = )
tusq tsq div 100 mul cvi 100 div  /avf exch store avf
20 string cvs mergestr (.\n\n) mergestr print flush

%%%%%%%%% (M) EXTRACT USER ACTIVITY LOGS AND REPORT %%%%%%%%%%%%%%%%%%%%%%%%

% Typical visitor activity is intersperced with other users, but still occurs
% in time sequence. A userlog array can first be created of form...

%  [ 
%  [{useraurl) hitcount [hit linecount positions 13 17 54 ... etc ]
%  [{userburl) hitcount [hit linecount positions 14 23 35 ... etc ]

% From that activity log, a filtered (eliminating GIF's, banners, etc..) script
% of the visitor page views and file downloads can be created. Visit time and
% page flow and exit page can also be extracted. 

% First create the unsorted array without linecounts. We will try a dictionary method
% here to allow use of the ~known~ operator...


stopwatchon

(Creating a visitor log array...\n\n) print flush


/visitorrawdict 1 dict store




/addnewurl {cvurl cvn [ vposn ] store} store

/bumplist {cvurl cvn dup mark exch load aload pop vposn  ] store } store



/stuffvdict {/cvurl exch store         visitorrawdict begin
visitorrawdict cvurl cvn known  {bumplist}{addnewurl} ifelse

end




}store

stopwatchon


% WE HAD A MAJOR NONLINEAR SPEED PROBLEM HERE BEYOND 10,000 HITS.
% Selective vm reclaimation appears to have minimized the problem


-2 vmreclaim   % turn reclamation off

0 1 c-ip length 1 sub     {/vposn exch store 


        vposn 1 add 2500 mod 0 eq {(Processed ) vposn 1 add 20 string cvs
          mergestr ( visitor hits.\n) mergestr print flush

                1 vmreclaim -2 vmreclaim  % reclaim once, then shut down
                 

                           } if
                       c-ip vposn get 
        
               stuffvdict} for  % this does it

1 vmreclaim  % reclaim once
0 vmreclaim  % return to normal


stopwatchoff

(\n\n\n\n\n) print flush

% visitorrawdict {== ==  (\n) print flush } forall (\n\n\n) print flush % uncomment degub


% Then convert to an array and add the linecounts...


 /visitorrawarray 

mark  

visitorrawdict {   exch 20 string cvs  exch dup length exch 

mark 4 1 roll ]
 

   } forall

] store



% visitorrawarray {== } forall    % uncomment to analyze


(\n\nNumber of unique visitors is approximately )
visitorrawarray length  20 string cvs  mergestr (.\n\n) mergestr print flush



% Then bubble sort by visitor hit count...


visitorrawarray bubblesort   % NOT SORTING!!!



% visitorrawarray {== } forall       % uncomment to view

% Then report the individual visitor logs

% /timesubtractor takes two (HH:MM:SS) strings, converts them to numeric seconds,
% subtracts them, and then reports the difference as minutes and seconds. It assumes
% the earliest and latest times in any 24 hour period represents the visitor time.

% Note that a very high visit time combined with low activity probably means more
% than one session.

/timesubtractor { visitorrawarray posn get 2 get 0 get /earliest exch store
                  visitorrawarray posn get 2 get dup length 1 sub get /latest exch store

                  timearray earliest get /earlytime exch store
                  timearray latest get /latesttime exch store

          earlytime 0 get 48 sub 10 mul
          earlytime 1 get 48 sub add 60 mul  % total hours as minutes

          earlytime 3 get 48 sub 10 mul
          earlytime 4 get 48 sub add     add   60 mul  % total minutes as seconds

          earlytime 6 get 48 sub 10 mul
          earlytime 7 get 48 sub add     add   /earlyseconds exch store
   
          latesttime 0 get 48 sub 10 mul
          latesttime 1 get 48 sub add 60 mul

          latesttime 3 get 48 sub 10 mul
          latesttime 4 get 48 sub add     add   60 mul

          latesttime 6 get 48 sub 10 mul
          latesttime 7 get 48 sub add     add   /lateseconds exch store

          lateseconds earlyseconds sub cvi   % /deltaseconds exch store

                   dup 0 lt {24 3600 mul add} if   % GMT wraparound fix?
                   /deltaseconds exch store

          deltaseconds 60 idiv  20 string cvs ( minutes and ) mergestr
          deltaseconds 60 mod   20 string cvs mergestr
          ( seconds) mergestr
 
        } store

% processhit filters and positions the files. At present, all immediate
% previously viewed files are suppressed, assuming they are a .PDF byte
% range retrival or a web retransmit. Banners, GIF's, newsum wallpaper,
% .ico icons, and barro buttons are also unreported. 

% Short .asp and .htm files appear in the left column and longer ones in
% the right. This attempts to give a left list of page views and a right
% list of downloaded files or secondary pages.

/prevhold (         ) store           % save previously viewed page

/processhit { /currenthit exch store

            currenthit (/banner) search {pop pop pop false}{pop true} ifelse
            currenthit (.gif) search   {pop pop pop false}{pop true} ifelse and
            currenthit (/barro) search {pop pop pop false}{pop true} ifelse and
            currenthit (newsum) search {pop pop pop false}{pop true} ifelse and
            currenthit (.ico) search {pop pop pop false}{pop true} ifelse and

            prevhold currenthit ne  and { (      ) print 

            currenthit (.asp)search {pop pop pop currenthit length 15 gt}
                       {pop true} ifelse
            currenthit (.htm)search {pop pop pop currenthit length 15 gt }
                       {pop true} ifelse      and
           
             {(                         ) print  currenthit ==  } 
             {currenthit == }ifelse

             /prevhold currenthit 300 string cvs store } if
             } store

0 1 visitorrawarray length 1 sub {/posn exch store

   (\nUser ) visitorrawarray posn get 0 get mergestr ( took ) mergestr timesubtractor
  mergestr ( to visit...\n\n) mergestr print flush

   visitorrawarray posn get 2 get /curmat exch store

   curmat { cs-uri-stem exch get processhit } forall  

   (\n) print flush

   } for

(\nTotal unique visitors = )
visitorrawarray length  /tuv exch store tuv
20 string cvs mergestr (.\n) mergestr print flush






%%%%%%%%%%%%% FURTHER ASSISTANCE %%%%%%%%%%%%%%

% ...and custom development is available via http://www.tinaja.com/info01.asp, by
% emailing don@tinaja.com, or by calling (928) 428-4073.





%%%%%%%%%%%%% NEW STUFF %%%%%%%%%%%%%%

% need caution note on GMT negatives

%%%%%%%%%%%% MORE ACCURATE SPECIFIC HITS %%%%%%%%%%

% this does not work as it does not remove doubles....

/sfhitcount 0 store
/targetfile (nalogeb.pdf) store

visitorrawarray { 2 get    

            {cs-uri-stem exch get targetfile search {pop pop pop /sfhitcount sfhitcount 1 add store}{pop} ifelse } forall

     } forall

(\n\nFile ANALOGEB.PDF saw ) sfhitcount 20 string cvs mergestr ( isolated downloads.\n\n\n) mergestr  print flush



%%%%%%%%%%%%% RAW GOOOGLE SEARCHES %%%%%%%%%%%%%%

/swapbub { /bshold bsarray bspos get store         % temporary stash for rollover
           bsarray bspos 1 add get                 % move next to previous
           bsarray exch bspos exch put
           bsarray bspos 1 add bshold put} store   % move previous hold to next


/ucadj {dup dup 65 ge exch 91 le and {32 add} if } store  % not in use



/alphabubblesort {   /bsarray exch store  

    bsarray length {                               % n trips max - can shorten later

        0 1 bsarray length 2 sub {/bspos exch store  % for all but last entry
        bsarray bspos get                     % compare present with next
        bsarray bspos 1 add get 
                    gt {swapbub} if } for            % swap if next is bigger

                    } repeat  } store               % repeat till done




/alphabubblesort

{

stopwatchon

mark exch aload pop counttomark /idx exch store 

 { 0 1 idx 1 sub {pop 2 copy gt {exch} if idx 1 roll} for idx 1 roll 
   /idx idx 1 sub store idx 0 eq {exit} if} loop ]


stopwatchoff 

 } store









/improvefilt { (~~~) mergestr /fstr exch store     fstr ==

            /fsptr 0 store

            mark   /leading true store

  200   {        fstr fsptr get  

           dup 37 eq {pop 32 /fsptr fsptr 2 add store} if   % strip %xx punct
           dup 43 eq { pop 32} if                           % strip stripe
           dup 32 eq leading and {pop} if   % crop leading spaces

           counttomark 2 ge { 
           2 copy 32 eq exch 32 eq and {pop} if  % crop repeat spaces
                             } if

           /leading false store
           /fsptr fsptr 1 add store
           fsptr fstr length 3 sub ge {exit} if

         } repeat   


         ]    /tempstor exch store


       /leading true store

       mark

       tempstor {     dup 32 eq leading and {pop } if
                     

                  /leading false store



                 } forall
           ]

% must strip additional leading FIRST, then convert!!!!


/tempstor1 exch store


       /leading true store

       mark

       tempstor1 {     
                     dup dup 97 ge exch 127 lt and leading and {32 sub} if 

                  /leading false store



                 } forall
           ]





dup ==

         dup length 5 gt {makestring   dup ==            }{pop} ifelse



    } store



/filtgoogle {pop pop (q=) search                    % query delimiter
                                  {pop pop (&) search
                                       {exch pop exch pop improvefilt}
                                       {improvefilt} ifelse }
                                  {pop} ifelse  
            } store


mark
/grabgoogle {0 get(google) search { filtgoogle}{pop} ifelse } store
fsehlist {grabgoogle} forall
]

/sordidlist exch store


sordidlist ==   


sordidlist
 alphabubblesort  /sordidlist exch store

sordidlist {==} forall




addtomonth { dayprocfilename run } if  % update monthly stats if active




% EOF