(* model-func.sml *) (* Copyright (C) 2008 Alley Stoughton This file is part of crypto, a cryptogram encoder/decoder. See the file COPYING.txt for copying and usage restrictions *) (* a functor that takes in a structure Data of signature DATA and returns a structure of signature MODEL where type SymLinOrd.elem = Data.SymLinOrd.elem and type pds = Data.pds and type SymSet.set = Data.SymSet.set and type SymLexicon.lexicon = Data.SymLexicon.lexicon where the declarations of DATA are made by opening Data *) functor ModelFunc(structure Data : DATA) :> MODEL where type SymLinOrd.elem = Data.SymLinOrd.elem and type pds = Data.pds and type SymSet.set = Data.SymSet.set and type SymLexicon.lexicon = Data.SymLexicon.lexicon = struct open Data (* model data *) type md = Random.seed fun init() = Random.makeFromTime() (* val symsWord : word -> sym_set compute the symbols of a word *) val symsWord = SymSet.fromList (* val symsLine xs : line -> sym_set compute the symbols of a line *) fun symsLine nil = SymSet.fromList[] | symsLine (x :: xs) = SymSet.union(symsWord x, symsLine xs) fun symsMsg nil = SymSet.fromList[] | symsMsg (x :: xs) = SymSet.union(symsLine x, symsMsg xs) val toPDM = map(map(map Old)) (* val pdsToSym : pds -> sym convert a totally decoded partially decoded symbol to a symbol, by removing the constructor New *) fun pdsToSym(Old _) = raise Fail "cannot happen" | pdsToSym(New x) = x (* val toMsg : pdm -> msg convert a totally decoded partially decoded message to a message, by removing every occurrence of New *) val toMsg = map(map(map pdsToSym)) (* val patternAtomPDS : pds -> SymLexicon.pat turn a partially decoded symbol into a pattern -- part of a pattern list expected by SymLexicon.matches; old symbols become wildcard patterns, and new symbols become literal patterns *) fun patternAtomPDS(Old x) = SymLexicon.Wild x | patternAtomPDS(New x) = SymLexicon.Lit x (* val patternPDW : pdw -> SymLexicon.pat list turn a partially decoded word into a pattern list of the sort expected by SymLexicon.matches; old symbols become wildcard patterns, and new symbols become literal patterns *) val patternPDW = map patternAtomPDS (* val patternsPDL : pdl -> SymLexicon.pat list list turn a partially decoded line into a list of pattern lists; see above for what happens to each pdw *) val patternsPDL = map patternPDW (* val patternsPDM : pdm -> SymLexicon.pat list list turn a partially decoded message into a list of pattern lists; see above for what happens to each pdw *) fun patternsPDM nil = nil | patternsPDM (x :: xs) = patternsPDL x @ patternsPDM xs (* val matches : sym_lexicon * SymLexicon.pat list list -> bool matches(patss, lex) tests whether every pattern list in patss matches (see SymLexicon.matches) at least one element of the set of words represented by lex *) fun matches(patss, lex) = List.all (fn pats => SymLexicon.matches(pats, lex)) patss (* val replacePDS : sym * sym -> pds -> pds replace an old symbol by a new one in a partially decoded symbol *) fun replacePDS (x, y) (Old z) = if SymLinOrd.compare(x, z) = EQUAL then New y else Old z | replacePDS _ (New z) = New z fun replace(x, y, pdm) = map (map(map(replacePDS(x, y)))) pdm (* val numOldOccsPDS : sym * pds -> int count the number of occurrences of a given old symbol in a partially decoded symbol *) fun numOldOccsPDS(x, Old y) = if SymLinOrd.compare(x, y) = EQUAL then 1 else 0 | numOldOccsPDS(_, New _) = 0 (* val numOldOccsPDW : sym * pdw -> int count the number of occurrences of a given old symbol in a partially decoded word *) fun numOldOccsPDW(x, [y]) = numOldOccsPDS(x, y) | numOldOccsPDW(x, y :: ys) = numOldOccsPDS(x, y) + numOldOccsPDW(x, ys) | numOldOccsPDW(x, nil) = raise Fail "cannot happen" (* val numOldOccsPDL : sym * pdl -> int count the number of occurrences of a given old symbol in a partially decoded line *) fun numOldOccsPDL(_, nil) = 0 | numOldOccsPDL(x, y :: ys) = numOldOccsPDW(x, y) + numOldOccsPDL(x, ys) (* val numOldOccsPDM : sym * pdm -> int count the number of occurrences of a given old symbol in a partially decoded message *) fun numOldOccsPDM(_, nil) = 0 | numOldOccsPDM(x, y :: ys) = numOldOccsPDL(x, y) + numOldOccsPDM(x, ys) (* val nextOld : Random.seed * pdm * sym_set -> Random.seed * sym in a call nextOld(seed, pdm, olds), we require that olds be the set of old symbols in pdm, and that olds be nonempty returns an old symbol that appears as frequently as any other old symbol in pdm, where ties are broken randomly; also returns the new random seed *) fun nextOld(seed, pdm, olds) = let val olds = SymSet.toList olds val freqs = map (fn a => (a, numOldOccsPDM(a, pdm))) olds (* val best : (sym * int)list -> (sym * int)list *) fun best [(a, n)] = [(a, n)] | best ((a, n) :: xs) = let val ys = best xs in case Int.compare(n, #2(hd ys)) of LESS => ys | EQUAL => (a, n) :: ys | GREATER => [(a, n)] end | best nil = raise Fail "cannot happen" val bests = best freqs val (seed, n) = Random.getInRange(seed, 0, length bests - 1) in (seed, #1(List.nth(bests, n))) end (* val insertAt : 'a * int * 'a list -> 'a list in a call insertAt(x, n, ys), we require that 0 <= n <= length ys insertAt(x, n, ys) inserts x after position n in ys (after position 0 is at the beginning; after position length ys is at the end) *) fun insertAt(x, _, nil) = [x] | insertAt(x, n, zs as y :: ys) = if n = 0 then x :: zs else y :: insertAt(x, n - 1, ys) (* val randomize : Random.seed * 'a list -> Random.seed * 'a list randomize(seed, xs) uses seed to randomize the order of xs, returning a new seed along with the randomly-ordered list *) fun randomize(seed, xs) = let (* val random : Random.seed * int * 'a list -> Random.seed * 'a list in a call random(seed, m, xs), m = length xs random(seed, m, xs) uses seed to randomize the order of xs, returning a new seed along with the randomly-ordered list *) fun random(seed, _, nil) = (seed, nil) | random(seed, m, x :: xs) = let val (seed, ys) = random(seed, m - 1, xs) val (seed, n) = Random.getInRange(seed, 0, m - 1) in (seed, insertAt(x, n, ys)) end in random(seed, length xs, xs) end datatype decodings = DecodingsNone | DecodingsUnique of msg | DecodingsMultiple (* val decods : 'a * ('a -> 'a * bool) * md * sym_lexicon * pdm * sym_set * sym_set -> 'a * md * decodings option in a call decodgs(ab, ca, md, lex, pdm, olds, news), we require that olds is the old symbols of pdm, news is the new symbols of pdm, and (symbols - news) is at least as big as olds ab and ca are used together in a single-threaded way; ab is the current abortable computation data; ca (check abort) takes in the current abortable computation data, and returns the next version of the abortable computation data, plus a boolean, which is true iff abortion of the computation is being requested; decodgs calls ca with the current abortable computation data at the beginning of each call; it aborts by returning (ab, md, NONE), where ab is the current abortable computation data and md is the current model data; it returns normally by returning a value of the form (ab, md, SOME v); the ca function should be fast, as it will be called many times a DECODING of pdm is a message msg such that: msg has the same shape as pdm; and msg agrees with pdm on news; and there is a bijection from olds to a subset of (symbols - news) that turns the rest of pdm into msg; and every word in msg is in the set represented by lex if decodgs learns that there are no decodings of pdm, then it returns (ab, md, SOME DecodingsNone) if decodgs learns that msg is the unique decoding of pdm, then it returns (ab, md, SOME(DecodingsUnique msg)) if decodgs learns that there are multiple decodings of pdm then it returns (ab, md, SOME DecodingsMultiple) *) fun decods(ab : 'a, ca, md, lex, pdm, olds, news) = let val (ab, b) = ca ab in if b then (ab, md, NONE) else if matches(patternsPDM pdm, lex) then if SymSet.size olds = 0 then (ab, md, SOME(DecodingsUnique(toMsg pdm))) else let val (md, a) = nextOld(md, pdm, olds) val olds = SymSet.minus(olds, SymSet.fromList[a]) val (md, reps) = randomize(md, SymSet.toList(SymSet.minus(symbols, news))) (* val try : 'a * md * sym list -> 'a * md * decodings option *) fun try(ab, md, nil) = (ab, md, SOME DecodingsNone) | try(ab, md, b :: bs) = let val pdm = replace(a, b, pdm) val news = SymSet.union(news, SymSet.fromList[b]) in case decods(ab, ca, md, lex, pdm, olds, news) of (ab, md, SOME DecodingsNone) => try(ab, md, bs) | (ab, md, SOME(DecodingsUnique msg)) => (case try(ab, md, bs) of (ab, md, SOME DecodingsNone) => (ab, md, SOME(DecodingsUnique msg)) | (ab, md, SOME _) => (ab, md, SOME DecodingsMultiple) | x => x) | x => x end in try(ab, md, reps) end else (ab, md, SOME DecodingsNone) end fun decodings(ab, ca, md, lex, msg) = decods(ab, ca, md, lex, toPDM msg, symsMsg msg, SymSet.fromList nil) (* val decodablePDS : pds * sym -> bool *) fun decodablePDS(Old _, _) = true | decodablePDS(New a, b) = SymLinOrd.compare(a, b) = EQUAL (* val decodablePDM : pdm * msg -> bool if pdm and msg have the same shape, then decodablePDM(pdm, msg) tests whether, for all symbols a, if New a appears at a given position in pdm, then a appears at the same position in msg *) val decodablePDM = ListPair.allEq(ListPair.allEq(ListPair.allEq decodablePDS)) (* val decodingOfOldPDS : sym * pds * sym -> sym option decodingOfOldPDS(a, x, c) returns SOME c, if x is Old a; otherwise, it returns NONE *) fun decodingOfOldPDS(a, Old b, c) = if SymLinOrd.compare(a, b) = EQUAL then SOME c else NONE | decodingOfOldPDS(_, New _, _) = NONE (* val decodingOfOldPDW : sym * pdw * word -> sym option in a call decodingOfOldPDW(a, xs, cs), xs and cs will have the same length decodingOfOldPDW(a, xs, cs) returns SOME c, if a appears as an old symbol in xs, and c is what appears in cs at the same position as the first old occurrence of a in xs; otherwise, it returns NONE *) fun decodingOfOldPDW (a, [x], [c]) = decodingOfOldPDS(a, x, c) | decodingOfOldPDW (a, x :: xs, c :: cs) = (case decodingOfOldPDS(a, x, c) of NONE => decodingOfOldPDW(a, xs, cs) | SOME d => SOME d) | decodingOfOldPDW _ = raise Fail "cannot happen" (* val decodingOfOldPDL : sym * pdl * line -> sym option in a call decodingOfOldPDL(a, xs, ys), xs and ys will have the same shape decodingOfOldPDL(a, xs, ys) returns SOME c, if a appears as an old symbol in xs, and c is what appears in ys at the same position as the first old occurrence of a in xs; otherwise, it returns NONE *) fun decodingOfOldPDL (a, nil, nil) = NONE | decodingOfOldPDL (a, x :: xs, y :: ys) = (case decodingOfOldPDW(a, x, y) of NONE => decodingOfOldPDL(a, xs, ys) | SOME d => SOME d) | decodingOfOldPDL _ = raise Fail "cannot happen" (* val decodingOfOldPDM : sym * pdm * msg -> sym option in a call decodingOfOldPDM(a, xs, ys), xs and ys will have the same shape decodingOfOldPDM(a, xs, ys) returns SOME c, if a appears as an old symbol in xs, and c is what appears in ys at the same position as the first old occurrence of a in xs; otherwise, it returns NONE *) fun decodingOfOldPDM (a, nil, nil) = NONE | decodingOfOldPDM (a, x :: xs, y :: ys) = (case decodingOfOldPDL(a, x, y) of NONE => decodingOfOldPDM(a, xs, ys) | SOME d => SOME d) | decodingOfOldPDM _ = raise Fail "cannot happen" datatype hint = HintDecoded | HintNotDecodable | HintReplace of sym * sym fun findHint(md, msg, pdm, olds, msg') = if decodablePDM(pdm, msg') then if SymSet.size olds = 0 then (md, HintDecoded) else let val (md, a) = nextOld(md, pdm, olds) in (md, HintReplace(a, valOf(decodingOfOldPDM(a, pdm, msg')))) end else (md, HintNotDecodable) (* val unknownWordsLine : sym_lexicon * int * line -> (int * word)list unknownWordsMsg(lex, n, xs) returns the words of xs that are not in the set represented by lex, where the words appear in the order they appear in msg, and are annotated with n *) fun unknownWordsLine(lex, _, nil) = nil | unknownWordsLine(lex, n, x :: xs) = if SymLexicon.matches(map SymLexicon.Lit x, lex) then unknownWordsLine(lex, n, xs) else (n, x) :: unknownWordsLine(lex, n, xs) (* val unknownWordsMsg : sym_lexicon * int * msg -> (int * word)list unknownWordsMsg(lex, n, xs) returns the words of xs that are not in the set represented by lex, where the words appear in the order they appear in xs, and are annotated with n + the numbers of the lines on which they occur (line numbers begin with 1) *) fun unknownWordsMsg(lex, _, nil) = nil | unknownWordsMsg(lex, n, x :: xs) = unknownWordsLine(lex, n + 1, x) @ unknownWordsMsg(lex, n + 1, xs) fun unknownWords(lex, xs) = unknownWordsMsg(lex, 0, xs) (* val enc : Random.seed * pdm * sym_set * sym_set -> Random.seed * msg in a call enc(seed, pdm, olds, news), we require that olds is the old symbols of pdm, news is the new symbols of pdm, and (symbols - news) is as least as big as olds in contrast to our usual interpretation of pdm, old symbols are ones that haven't been ENCODED yet, and new symbols are ones that have enc uses seed to generate a random bijection between olds and a subset of (symbols - news), in the process producing seed', and returns seed', along with the result of applying this bijection to pdm (the new symbols are left alone) *) fun enc(seed, pdm, olds, news) = if SymSet.size olds = 0 then (seed, toMsg pdm) else let val (a, olds) = case SymSet.toList olds of nil => raise Fail "cannot happen" | a :: olds => (a, SymSet.fromList olds) val avails = SymSet.minus(symbols, news) val (seed, n) = Random.getInRange(seed, 0, SymSet.size avails - 1) val b = List.nth(SymSet.toList avails, n) val news = SymSet.union(news, SymSet.fromList[b]) val pdm = replace(a, b, pdm) in enc(seed, pdm, olds, news) end fun encode(md, msg) = enc(md, toPDM msg, symsMsg msg, SymSet.fromList nil) end;