/* * Copyright (c) 2005 Kanru Chen * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ using System; using System.IO; using System.Text; using Nemerle.Utility; namespace KC { type StringPair = int * int; type TransitionPair = StringPair * State; public class State { class TransitionNode { public this (k: char, v: TransitionPair) { Key = k; Value = v; } public mutable Key : char; public mutable Value : TransitionPair; public mutable Next : TransitionNode = null; } mutable transitions : TransitionNode; mutable count = 0; [Accessor (flags=WantSetter)] mutable s_link : State; public Item[a: char] : TransitionPair { get { def ret (t) { match (t) { | null => ((0, 0), null); | _ when t.Key == a => t.Value; | _ => ret (t.Next); } } ret (transitions); } set { ret : { if (transitions == null) { transitions = TransitionNode (a, value); count++; } else { mutable last : TransitionNode = null; for (mutable i = transitions; i != null; i = i.Next) { when (i.Key == a) { i.Value = value; ret (); } last = i; } last.Next = TransitionNode (a, value); count++; } } } } public Has (a: char) : bool { def ret (t) { match (t) { | null => false; | _ when t.Key == a => true; | _ => ret (t.Next); } } ret (transitions); } public Transitions : array [char] { get { if (transitions == null) null; else { def a = array (count); def build(i, t) { unless (t == null) { a[i] = t.Key; build(i+1, t.Next); } } build(0, transitions); a; } } } } public class SuffixTree { inf = Int32.MaxValue; [Accessor] text : StringBuilder = StringBuilder (); root : State = State (); bottom : State = State (); mutable i : int; mutable k : int; mutable working : State; public this (input: string) { init (); def do_build () { unless (i >= input.Length) { Append (input[i]); do_build (); } } do_build (); } public this () { init (); } init () : void { root.SLink = bottom; working = root; k = 0; i = 0; } public Append (t: char) : void { _ = text.Append (t); bottom[text[i]] = ((i, i), root); def (s', k') = UpDate (working, k, i); def (s', k') = Canonize (s', k', i); working = s'; k = k'; i = i + 1; } UpDate (s: State, k: int, i: int) : State * int { def do_update (o, s', k') { def (is_end_point, r) = TestAndSplit (s', k', i-1, i); if (is_end_point) { unless (o == root: object) o.SLink = s'; (s', k'); } else { r[text[i]] = ((i, inf), State()); unless (o == root: object) o.SLink = r; def (s', k') = Canonize (s'.SLink, k', i-1); do_update (r, s', k'); } } do_update (root, s, k); } TestAndSplit (s: State, k: int, p: int, t: int) : bool * State { if (k <= p) { def ((k', p'), s') = s[text[k]]; if (text[t] == text[k' + p - k + 1]) (true, s); else { def r = State (); s[text[k]] = ((k', k'+p-k), r); r[text[k'+p-k+1]] = ((k'+p-k+1, p'), s'); (false, r); } } else { (s.Has(text[t]), s); } } Canonize (mutable s: State, mutable k: int, p: int) : State * int { if (p < k) (s, k); else { mutable s' :State = null; mutable k' = 0; mutable p' = 0; ((k', p'), s') = s[text[k]]; while (p'-k' <= p-k) { k += p'-k' + 1; s = s'; when (k <= p) { ((k', p'), s') = s[text[k]]; } } (s, k); } } ShowTree (s: State, prefix: string) : void { def trans = s.Transitions; def text = text.ToString (); when (trans != null) { for (mutable i = 0; i < trans.Length; i++) { def ((k, p), s') = s[trans[i]]; def p = if (p == inf) text.Length - k else p - k + 1; match (i) { | x when x == trans.Length-1 => Console.WriteLine ("{0}`-- {1}", prefix, text.Substring(k, p)); ShowTree (s', prefix + " "); | _ => Console.WriteLine ("{0}|-- {1}", prefix, text.Substring(k, p)); ShowTree (s', prefix + "| "); } } } } public PrintTree () : void { Console.WriteLine ("."); ShowTree (root, ""); } CountStates (s: State) : int { def trans = s.Transitions; match (trans) { | x when x != null => mutable count = trans.Length; for (mutable i = 0; i < trans.Length; i++) { def ((_, _), s') = s[trans[i]]; count += CountStates (s'); } count; | _ => -1; } } public PrintStats () : void { Console.WriteLine ("Have {0} internal nodes", CountStates (root) + 1); } } public module T { public Main (args: array [string]) : void { match (args.Length) { | x when (x >= 2 && args[0] == "-f") => def reader = StreamReader (args[1], Text.ASCIIEncoding ()); def st = SuffixTree (); def do_build () { def t = reader.Read (); when (t != -1) { def x = t :> char; when ( (x <= 'z' && x >= 'a') || (x <= 'Z' && x >= 'A') || (x <= '9' && x >= '0') ) { st.Append (x); } do_build (); } } do_build (); st.Append ('#'); st.PrintStats (); | x when x == 1 => def st = SuffixTree (args[0] + "#"); st.PrintTree (); st.PrintStats (); | _ => Console.WriteLine ("Usage: suffix [string|-f filename]"); } } } }