@mastersthesis{Corlett,
  author = "Eric Corlett",
  title = "{An Exact A* Method for Solving Letter Substitution Ciphers}",
  year = "2011",
  school = "Department of Computer Science, University of Toronto",
  abstract = "<p>Letter-substitution ciphers encode a document from a
                  known or hypothesized language into an unknown
                  writing system or an unknown encoding of a known
                  writing system. The process of solving these ciphers
                  is a problem that can occur in a number of practical
                  applications, such as in the problem of determining
                  the encodings of electronic documents in which the
                  language is known, but the encoding standard is
                  not. It has also been used in OCR applications. <P> In
                  this paper, we introduce a novel method for
                  deciphering letter-substitution ciphers. We do this
                  by formulating a variant of the Viterbi algorithm
                  for use as an A* heuristic over partial solutions of
                  a given cipher. This heuristic can then be used as a
                  guide in an A* search for the correct solution. We
                  give an overview of the classical Viterbi and A*
                  search algorithms, go on to describe of our proposed
                  algorithm, and prove its correctness. <P> We then test
                  our algorithm on a selection of ciphers formed from
                  Wikipedia articles, and show that our algorithm has
                  the potential to be a viable, practical method for
                  efficiently solving decipherment problems. We also
                  find, however, that it does have a number of
                  shortcomings, most notably a high variation in
                  running time between similar ciphers. In response to
                  this, we describe potential sources of information
                  to offset this variability and use this information
                  to improve our original algorithm. <P> We test this
                  improved algorithm on both the original ciphers and
                  a selection of newly collected ciphers and find an
                  average improvement in time and an across-the-board
                  improvement in variability. We conclude that we have
                  successfully addressed the issue of high variability
                  found in our original algorithm. The improved
                  algorithm proves to be highly effective in the task
                  of solving letter-substitution decipherment
                  problems. </p> ",
  download = "http://ftp.cs.toronto.edu/pub/gh/Corlett-MSc-2011.pdf"
}





