(* model.sig *) (* Copyright (C) 2008 Alley Stoughton This file is part of crypto, a cryptogram encoder/decoder. See the file COPYING.txt for copying and usage restrictions *) (* model - domain specific aspects - encoding and decoding *) signature MODEL = sig (* basic symbol-based data *) include DATA (* data associated with the model; the functions of the model may use this data however they wish *) type md (* create initial model data *) val init : unit -> md (* compute the symbols of a message *) val symsMsg : msg -> sym_set (* convert a msg to a pdm by labeling each symbol with Old *) val toPDM : msg -> pdm (* replace(a, b, pdm) returns the result of replacing all occurrences of Old a in pdm by New b *) val replace : sym * sym * pdm -> pdm datatype decodings = DecodingsNone | DecodingsUnique of msg | DecodingsMultiple (* decodings is called with (ab, ca, md, lex, msg) ab and ca are intended to be used together, in a single-threaded way, to allow decoding's computation to be aborted; ab is the initial abortable computation data; ca (check abort) takes in the current abortable computation data, and returns the next version of the abortable computation data, plus a boolean, which is true iff abortion of the computation is being requested; decodings calls ca with the current abortable computation data at the beginning of each step of its computation; it aborts by returning (ab, md, NONE), where ab is the current abortable computation data and md is new model data; it returns normally by returning a value of the form (ab, md, SOME v); the ca function should be fast, as it will be called many times if decodings learns that there are no decodings of msg, relative to the set of words represented by lex, then decodings returns (ab, md, SOME DecodingsNone) if decodings learns that msg' is the unique decoding of msg, relative to the set of words represented by lex, then decodings returns (ab, md, SOME(DecodingsUnique msg')) if decodings learns that there are multiple decodings of msg, relative to the set of words represented by lex, then decodings returns (ab, md, SOME DecodingsMultiple) *) val decodings : 'a * ('a -> 'a * bool) * md * sym_lexicon * msg -> 'a * md * decodings option datatype hint = HintDecoded | HintNotDecodable | HintReplace of sym * sym (* in a call findHint(md, msg, pdm, olds, msg'), we require that pdm be consistent with msg, that msg' be the unique decoding of msg, and that olds be the old symbols of pdm if pdm is not decodable, then findHint returns (md, HintNotDecodable), where md is new model data otherwise, if pdm is decoded, then findHint returns (md, HintDecoded) otherwise, findHint returns (md, HintReplace(a, b)), where a is an old symbol of pdm that occurs at least as often as any another old old symbol, and b is a's decoding in msg' *) val findHint : md * msg * pdm * sym_set * msg -> md * hint (* unknownWords(lex, msg) returns the words of msg that are not in the set represented by lex, where the words appear in the order they appear in msg, and are annotated with the numbers of the lines on which they occur (line numbers begin with 1) *) val unknownWords : sym_lexicon * msg -> (int * word)list (* encode(md, msg) generates a random bijection from the symbols of msg to a subset of symbols, returning new model data plus the result of applying this bijection to msg *) val encode : md * msg -> md * msg end;