/* HTML-FREQ.C - MAIN PROGRAM FOR GATHERING STATISTICS ON HTML FILES. */

#include <stdio.h>
#include "code.h"
#include "freq.h"
#include "html.h"

main 
( int argc,
  char **argv
)
{   
  html_options options;		/* Options specifying how compression is done */
  html_context context;		/* Information on the current context */
  frequencies *freq;		/* Frequencies for current context */

  int ch; 			/* Next character to encode */
  int index;			/* Index of character to encode */

  FILE *hf;			/* HTML file */
  FILE *sf;			/* Statistics file */

  int i;

  /* Process command line options, exiting if there's an error. */

  if (html_arguments(&argv,&options)!=0 || options.stats_file==0 || *argv==0)
  { fprintf (stderr,
     "Usage: html-freq [ -h | -H ] [ -p ] stats-file HTML-file ...\n");
    exit(1);
  }

  /* Open statistics file for writing. */

  sf = fopen(options.stats_file,"wb");
  if (sf==NULL)
  { fprintf(stderr,"Can't write statistics file (%s)\n",options.stats_file);
    exit(1);
  }

  /* Initialize the contexts - after disabling use of the statistics file. */

  options.stats_file = 0;
  html_initial_context(&options,&context);

  /* Read each HTML file. */

  for ( ; *argv!=0; argv++)
  {
    /* Open the next HTML file. */

    hf = fopen(*argv,"r");
    if (hf==NULL)
    { fprintf(stderr,"Can't read HTML file %s - skipping it\n",*argv);
      continue;
    }

    /* Process each character in turn, switching contexts as necessary. */
  
    for (;;)
    { 
      /* Read the next character to encode; terminate loop on EOF. */
  
      ch = getc(hf);
      if (ch==EOF) break;
  
      /* Update frequencies for current context based on the character read. */
  
      freq = html_find_table(&options,&context);
      index = freq->symbol_to_index[ch];
      update_frequencies(freq,index);
  
      /* Move to the new context, based on the character read. */
  
      html_next_context(ch,&options,&context);
    }
  
    /* Process the EOF symbol. */
  
    freq = html_find_table(&options,&context);
    index = freq->symbol_to_index[EOF_symbol];
    update_frequencies(freq,index);

    fclose(hf);
  }

  /* Write the frequencies to the statistics file. */

  for (i = 0; i<context.n_freq; i++)
  { if (fwrite(&context.freq[i],sizeof(frequencies),1,sf)!=1)
    { fprintf(stderr,"Error wrting statistics file (%s)\n",
                     options.stats_file);  
      exit(1);
    }
  }

  exit(0);
}

