(*                 latinus.ml

    This file is part of COLLATINUS

    COLLATINUS is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    COLLATINUS is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with COLLATINUS; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    (c) Yves Ouvrard, Angoulme, 2005 - 2006

 ----------------------------------------------------------------------------

Bogues
  - Ajouter l'acc. neutre sing dans la morpho des
    formes canoniques.

Erreurs dans lemmata 
  - Certaines dsinences de eo (fut. ant. par ex) ne sont pas 
    dans lemmata

Avertissement
  - Le fichier lemmata utilis par cette version ocaml
    bien que son format soit strictement le mme,
    a quelques diffrence avec celui de la version Python.
    En particulier, les entres sum et eo sont ajoutes
    directement dans le code, et ne figurent donc pas
    dans lemmata.

A faire 
  - Implmenter toutes les fonctionnalits : lemmatisation
    par lots, flexion franaise, synthse des formes,
    tableaux de flexion.

                                  Moydans, 12-19 aot 2005
                                  Angoulme, 6 fvrier 2006
*)

(* Dfinition des paramtres morpho *)
let modeles = [| 
      "uita"; "amicus"; "puer"; "ager"; "templum";        (*  0 - 4  *)
      "miles"; "ciuis"; "corpus"; "mare"; "manus";        (*  5 - 9  *)
      "res"; "bonus"; "miser"; "pulcher"; "fortis";       (* 10 - 14 *)
      "uetus"; "acer"; "amo"; "moneo"; "lego";            (* 15 - 19 *)
      "capio"; "audio"; "sum"; "eo"; "imitor";            (* 20 - 24 *)
      "uereor"; "sequor"; "patior"; "potior"; "pronoms";  (* 25 - 29 *) 
      "invaria" |];;

let cass = [|
    ""; "nominatif"; "vocatif"; "accusatif"; "gnitif"; "datif"; "ablatif"|];;
let genres = [| ""; "masculin"; "fminin"; "neutre" |];;
let nombres = [| ""; "singulier"; "pluriel" |];;
let personnes = [| ""; "1re"; "2me"; "3me"|];;
let degres = [| ""; "positif"; "comparatif"; "superlatif" |];;
let tempss = [|
    ""; "prsent"; "futur"; "imparfait"; "parfait";
      "futur antrieur"; "plus-que-parfait"|];;
let modes = [|
    ""; "indicatif"; "subjonctif"; "impratif"; "infinitif";
     "participe"; "grondif"; "adjectif verbal" |];;
let voixs = [| ""; "actif"; "passif" |];;

(* Quelques fonctions utiles 
   1. ini c n donne de donne la chaine c ampute 
   de n caractres  droite *)
let ini c n = String.sub c 0 ((String.length c) - n);;

(* 2. jviu transforme la graphie ramiste en graphie ancienne *)
let jviu f =
   let ji = Str.regexp "j" 
   and vu = Str.regexp "v" in
   let tmp = Str.global_replace ji "i" f in
   let tmp = Str.global_replace vu "u" tmp in
   tmp;;

                    (**** CLASSES ****)
(* Classe des dsinences *)
class tdes l =
    object (self)
    val mutable gr = ""
    val mutable c = ""
    val mutable g = "" 
    val mutable n = "" 
    val mutable d = ""
    val mutable p = "" 
    val mutable t =  ""
    val mutable m =  ""
    val mutable v =  ""
    val mutable mdl = ""
    val mutable radnum = ""
    method graphie = gr
    method numero = int_of_string radnum
    method modele = int_of_string mdl
    method cas = cass.(int_of_string c)
    method genre = genres.(int_of_string g)
    method personne = personnes.(int_of_string p)
    method nombre = nombres.(int_of_string n)
    method degre = degres.(int_of_string d)
    method temps = tempss.(int_of_string t)
    method mode = modes.(int_of_string m)
    method voix = voixs.(int_of_string v)
    method morpho = 
      String.concat " " [ 
         self#cas; self#genre; self#personne; self#nombre; 
         self#degre; self#temps; self#mode; self#voix]
       
    initializer
        (* graph|casus|genus|numerus|gradus|persona|tempus|modus|uox|paradigma|radix_num *)
        let tabula = Str.split (Str.regexp "|") l in
        gr <- List.hd tabula;
        let tabula = List.tl tabula in
        c <-     List.hd tabula;
        let tabula = List.tl tabula in
        g  <-  List.hd tabula;
        let tabula = List.tl tabula in
        n <-  List.hd tabula;
        let tabula = List.tl tabula in
        d <-   List.hd tabula;
        let tabula = List.tl tabula in
        p <-  List.hd tabula;
        let tabula = List.tl tabula in
        t <-  List.hd tabula;
        let tabula = List.tl tabula in
        m <-    List.hd tabula;
        let tabula = List.tl tabula in
        v <-    List.hd tabula;
        let tabula = List.tl tabula in
        mdl <-  List.hd tabula;
        let tabula = List.tl tabula in
        radnum  <- List.hd tabula 
    end;;

(* Classe des irrguliers *)
class tirr l =
   object (self)
  (* maximis|magnus|5|1|2|0|0|0|0|0 *)
    val mutable gr = ""
    val mutable k = ""
    val mutable c = ""
    val mutable g = "" 
    val mutable n = "" 
    val mutable d = ""
    val mutable p = "" 
    val mutable t =  ""
    val mutable m =  ""
    val mutable v =  ""
    method graphie = gr
    method canon = k
    method cas = cass.(int_of_string c)
    method genre = genres.(int_of_string g)
    method nombre = nombres.(int_of_string n)
    method degre = degres.(int_of_string d)
    method personne = personnes.(int_of_string p)
    method temps = tempss.(int_of_string t)
    method mode = modes.(int_of_string m)
    method voix = voixs.(int_of_string v)
    method morpho = String.concat " " [
        self#cas; self#genre; self#degre; self#personne; 
        self#nombre; self#temps; self#mode; self#voix]
    initializer
        let tabula = Str.split (Str.regexp "|") l in
        gr <- List.hd tabula;
        let tabula = List.tl tabula in
        k <- List.hd tabula;
        let tabula = List.tl tabula in
        c <-     List.hd tabula;
        let tabula = List.tl tabula in
        g  <-  List.hd tabula;
        let tabula = List.tl tabula in
        n <-  List.hd tabula;
        let tabula = List.tl tabula in
        d <-   List.hd tabula;
        let tabula = List.tl tabula in
        p <-  List.hd tabula;
        let tabula = List.tl tabula in
        t  <-  List.hd tabula;
        let tabula = List.tl tabula in
        m <-    List.hd tabula;
        let tabula = List.tl tabula in
        v <-    List.hd tabula;
   end;;

(* classe des radicaux *)
class tradix k m n =
   object (self)
   val can = k
   val mo = m
   val num = n
   method canon = can 
   method modele = mo
   method numero = num
   method doc = string_of_int num ^ can ^ ", " ^ string_of_int mo
   end;;

(* Classe des entres *)
class tentree l =
    object (self)
       val mutable k = ""
       val mutable p = 0 
       val mutable rp = ""
       val mutable rs = ""
       val mutable g = ""
       method index = k
       method canon = Str.replace_first (Str.regexp " (2)$") "" k
       method cf nk ng = k <- nk; g <- ng
       method modele = p
       method rperfectum = rp
       method rsupinum = rs
       method txt = g
       method confer = Str.string_match (Str.regexp "cf. ") g 0
       method radix = 
          (* nettoyer le (2) *)
          let ka = Str.replace_first (Str.regexp " (2)$") "" k in
          match p with 
            |5|6|7|8 -> ka 
            |0|17|19 -> ini ka 1 
            |1|2|3|4|9|10|11|12|13|14|15|18|20|21|23|24|26 -> ini ka 2 
            |16 -> (ini ka 2) ^ "r"
            |22|25|27|28 -> ini ka 3
            |_ -> ""

      method desK =
       (* Fournit la dsinence canonique du lemme de modle m.           
       Cette fonction n'est videmment pas applicable pour 
       les modles sans dsinence canonique stable (essentiellement la 
       troisime dclinaison des noms et adjectifs). *)
      match p with
         | 0 -> "a"
         |1|9|11 -> "us"
         |2|3|12|13 -> "er"
         |4 -> "um"
         |10 -> "es"
         |17|19 -> "o"
         |18|23 -> "eo"
         |20|21 -> "io"
         |22 -> "sum"
         |24|26 -> "or"
         |25 -> "eor"
         |_ -> "ior"

      method est_neutre =
         try
         Str.search_forward (Str.regexp ", n. ") g 0 < 8  
         with Not_found -> false

      method categorie =
         if p < 11 then "n." 
         else if p < 17 then "adj."
         else if p < 29 then "v."
         else if p = 29 then "pr."
         else "inv"

      method morphoK = 
       (* Donne, d'aprs le modle p(aradigme), la morpho canonique de
         l'entre *)
       if (p < 11) && self#est_neutre then "nominatif singulier\n    accusatif singulier"
       else if p < 11 then "nominatif singulier"
       else if p < 17 then "nominatif masculin singulier"
       else if p < 29 then "1re singulier prsent indicatif actif"
       else ""

      (*method doc = self#categorie ^ " " ^ k ^ ", " ^ g *)
      method doc = k ^ ", " ^ g
      method docr = k ^ " paradigma " ^ string_of_int p ^ " " ^ g 
          ^ "\n  radices :" ^ self#radix ^ ":" ^ self#rperfectum 
          ^ ":" ^ self#rsupinum ^ "-"
      method doc_latex = Printf.sprintf "\\item \\textbf{%s}, %s" k g
      method doc_html  = Printf.sprintf "<li><b>%s</b>, %s" k g
      method dok modus =
         match modus with
           |"txt" -> self#doc
           |"html" -> self#doc_html
           |"tex" -> self#doc_latex
           |_ -> "?"

      initializer
          let tabula = Str.split (Str.regexp "|") l in
          k <- List.hd tabula; 
          let tabula = List.tl tabula in
          p <- int_of_string (List.hd tabula);
          let tabula = List.tl tabula in
          rp <- List.hd tabula;
          let tabula = List.tl tabula in
          rs <- List.hd tabula;
          let tabula = List.tl tabula in
          g <- List.hd tabula
    end;;


(*************************************************************)
(*                                                           *)
(*                    Lecture des donnes                    *)   
(*                                                           *)
(*************************************************************)

(* Dfinition du fichier de donnes *)
let capsam = "lemmata.fr";;

print_string "lexicum... ";;
  (*let capsa = open_in "lemmata"*)
let capsa = open_in capsam
  and incipit_d = "---desinentiae---"
  and incipit_i = "---irregulares---";;
let lexicum = Hashtbl.create 10000;;
let radices = Hashtbl.create 10000;;

(* 1. entres et radicaux *)
(* radices_adde clate les radicaux spars
   par des virgules, et les intgre aux tables *)
let radices_adde e r n =
   let l = Str.split (Str.regexp ",") r in
      List.iter (fun l ->
            let rad = new tradix e#index e#modele n
            in Hashtbl.add radices l rad
         )l;;

(* Radicaux nuls de sum et eo *)
(* Problme de la langue cible : il faudra renoncer  cette solution. *)
let lsum = "sum|22|fu|fut|es, esse, fui : tre ; en tte de phrase : il y a"
  in let entree = new tentree lsum
  in (Hashtbl.add lexicum "sum" entree;
     radices_adde entree "fu" 2);;
let ri = new tradix "sum" 22 1 
  in Hashtbl.add radices "" ri;;

let entree = new tentree "eo|23|i|it|is, ire, iui, itum : aller"
  in (Hashtbl.add lexicum entree#index entree;
      radices_adde entree "i,iu" 2;
      radices_adde entree "it" 3);;
let ri = new tradix "eo" 23 1 
  in Hashtbl.add radices "" ri;;

(* lecture ligne  ligne du fichier lemmata *)
let d = ref true in 
   while !d do 
       let l = input_line capsa in
       d := (l <> incipit_d);
       if !d 
          then let entree = new tentree l 
          in let r = entree#radix
          in Hashtbl.add lexicum entree#index entree;
          if r > "" then radices_adde entree r 1 ;
          let r = entree#rperfectum
          in if r > "" then radices_adde entree r 2;
          let r = entree#rsupinum
          in if r> "" then radices_adde entree r 3
       done;;
print_int (Hashtbl.length lexicum);;
print_newline();;

(* 2. dsinences *)
let ldes = Hashtbl.create 2000;;
print_string "desinentiae... ";;
let d = ref true in
   while !d do  
      let l = input_line capsa in
      d := (l <> incipit_i);
      if !d 
          then let des = new tdes l 
          in Hashtbl.add ldes des#graphie des 
      done;;
print_int (Hashtbl.length ldes);;
print_newline();;

(* 3. irrguliers *)
let lirr = Hashtbl.create 200;;
print_string "irregulares...";;
try
   while true do 
       let l = input_line capsa in
       let irr = new tirr l in 
       Hashtbl.add lirr irr#graphie irr 
   done with 
       | End_of_file  -> ();;
print_int (Hashtbl.length lirr);;
print_newline();;
print_string "radices : ";;
print_int (Hashtbl.length radices);;
print_newline ();;

(* correction des renvois cf. *)
Hashtbl.iter (fun c v ->
    if v#confer 
        then let entreeR = Str.global_replace (Str.regexp "cf. ") "" v#txt in 
             let entreeR = Str.global_replace (Str.regexp "\n$") "" entreeR  in 
             try
                let kcf = Hashtbl.find lexicum entreeR in 
                v#cf kcf#index kcf#txt
             with Not_found -> ()
   ) lexicum;;

(* mise en fonction de la lecture du fichier lemmata *)   
let lemmata_lege f =
    Hashtbl.clear lexicum;
    Hashtbl.clear radices;
    close_in capsa;
    let capsa = open_in f in
    let d = ref true in 
       while !d do 
           let l = input_line capsa in
           d := (l <> incipit_d);
           if !d 
              then let entree = new tentree l 
              in let r = entree#radix
              in Hashtbl.add lexicum entree#index entree;
              if r > "" then radices_adde entree r 1 ;
              let r = entree#rperfectum
              in if r > "" then radices_adde entree r 2;
              let r = entree#rsupinum
              in if r> "" then radices_adde entree r 3
       done;;

(* consultation du dictionnaire *)
let dico l =
   try
      let entree = Hashtbl.find lexicum l in
      Printf.printf "%s\n" entree#docr
   with Not_found -> ();;

let dictionnaire () =
   let repet = ref true in
   while !repet do
      print_string "uerbum : ";
      let uerbum = read_line () in
      if uerbum > "" then dico uerbum
      else repet := false
      done;;

class tlemmes =
   object (self)
     val liste = Hashtbl.create 2
     val alin = "\n"
     val mutable modus = "txt"

     method liste = liste
     method modus = modus
     method set_modus m = modus <- m
     method vide = Hashtbl.clear liste
     method adeja f m =
         let ltmp = Hashtbl.find_all liste f 
         in List.mem m ltmp
     method ajoute l m = 
         if not (self#adeja l m) then
            Hashtbl.add liste l m
     method graphie =
          let gr = ref "" in
          Hashtbl.iter (fun c v ->
             if v > " " then gr:=!gr^v^alin) liste; 
            !gr
     method graphie_frq frq =            
          (* filtre les rsultats en fonction de la frquence f d'apparition moyenne
             dans la langue latine. Cf module frequences.ml *)
          let gr = ref "" in
          Hashtbl.iter (fun c v ->
             if v > " " & Frequences.raritas c >= frq
                 then gr:=!gr^v^alin) liste; 
            !gr
     method affiche =
        Hashtbl.iter (fun i v ->
           let lin = i ^ v ^alin in
           print_string lin) liste
   end;;
let lemmes = new tlemmes;;

class tanalyses =
   object (self)
     val liste = Hashtbl.create 2
     val alin = "\n"
     val retrait = "    "
     method vide = Hashtbl.clear liste
     method napa f m =
        try
        let morphos = Hashtbl.find liste f in
            let lin = Str.global_replace (Str.regexp "[^a-zA-Z]+") "" morphos in
            let _ = Str.search_forward (Str.regexp_string lin) m 0 in true 
        with Not_found -> false 
     method ajoute l m = 
        try
        let morphos = Hashtbl.find liste l in
             let nmorphos = morphos ^ alin ^ retrait ^ m in
             Hashtbl.replace liste l nmorphos
        with Not_found ->
            let nmorphos = retrait ^ m in
            Hashtbl.add liste l nmorphos 
     method graphie =
          let gr = ref "" in
          Hashtbl.iter (fun i v ->
             gr:=!gr^i^alin^v^alin) liste; 
          !gr
     method affiche =
        Hashtbl.iter (fun i v ->
           let lin = i ^ alin ^ v ^alin in
           print_string lin) liste
   end;;
let analyses = new tanalyses;;

let analyse f =
   (* lemmatisation et analyse morpho de la forme f *)
   analyses#vide;
   (* chercher les formes canoniques *)
   try
      let entrees = Hashtbl.find_all lexicum f in
         List.iter (fun ek -> 
               analyses#ajoute ek#doc ek#morphoK
            (* Printf.printf "%s %s \n" ek#doc ek#morphoK *)
            ) entrees;
   (* toutes les possibilits radical.dsinence *)
   let l = String.length f in
   for i = 0 to l-1 do
      let r = String.sub f 0 i in
      let d = String.sub f i (l-i) in
      let di = Hashtbl.find_all ldes d in
      List.iter (fun dil ->
         let ri = Hashtbl.find_all radices r in 
         List.iter ( fun ril -> 
             if ril#numero = dil#numero && ril#modele = dil#modele then 
             let e = Hashtbl.find lexicum ril#canon in
             analyses#ajoute e#doc dil#morpho
             (* Printf.printf "%s.%s : %s %s\n" r d e#doc dil#morpho *)
         ) ri
    ) di
    done;
   (* chercher dans les irrguliers *)
   let irrs = Hashtbl.find_all lirr f in
         List.iter (fun irr ->
            let k = irr#canon in
            let e = Hashtbl.find lexicum k in
            analyses#ajoute e#doc irr#morpho
            (* Printf.printf "%s %s\n" e#doc irr#morpho *)
            ) irrs;
   (* chec *)
   if analyses#graphie = "" then analyses#ajoute f "?";
   with Not_found -> ();;

let lemmatise f = 
   (* lemmatisation et analyse morpho de la forme f *)
   lemmes#vide;
   lemmes#ajoute f "";
   (* chercher les formes canoniques *)
   try
      let entrees = Hashtbl.find_all lexicum f in
      List.iter (fun ek -> lemmes#ajoute f (ek#dok lemmes#modus)) entrees;

   (* toutes les possibilits radical.dsinence *)
   let l = String.length f in
   for i = 0 to l-1 do
      let r = String.sub f 0 i in
      let d = String.sub f i (l-i) in
      let di = Hashtbl.find_all ldes d in
      List.iter (fun dil ->
         let ri = Hashtbl.find_all radices r in 
         List.iter ( fun ril -> 
             if ril#numero = dil#numero && ril#modele = dil#modele then 
             let e = Hashtbl.find lexicum ril#canon in
             lemmes#ajoute f (e#dok lemmes#modus);
         ) ri
    ) di
    done;
   (* chercher dans les irrguliers *)
   let irrs = Hashtbl.find_all lirr f in
         List.iter (fun irr ->
            let k = irr#canon in
            let e = Hashtbl.find lexicum k in
              lemmes#ajoute f (e#dok lemmes#modus);
            ) irrs
   with Not_found -> (lemmes#ajoute f "?");;

let rec remove_doublons = function
    [] -> []
  | [e] -> [e]
  | a::b::rest -> (
      if a=b then remove_doublons (b::rest)
      else (a::(remove_doublons (b::rest))))

let sort_and_remove_doublons l =
  remove_doublons (Sort.list (<) l )

let analyse_u f =
   let reditus = ref "" in
   let u = jviu f in
   begin
       analyse u;
       reditus:= !reditus^analyses#graphie;
       (* majuscules *)
       if String.capitalize u = u 
           then begin
              let min = String.lowercase u in
               analyse min;
               reditus:= !reditus^analyses#graphie;
           end;
       (* -que et -ue *)
       let insuffx = Str.replace_first (Str.regexp "q?ue$") "" u in
           if insuffx != u then 
               begin
                   analyse insuffx;
                   reditus:= !reditus^analyses#graphie;
                   (*
                   let min = String.lowercase insuffx in
                   if min != insuffx then 
                      begin
                          lemmatise min;
                          reditus:= !reditus^lemmes#graphie;
                      end;
                   *)
               end;
   end;
   !reditus;;

let lemmatise_u f =
   (* lemmatise la forme f *)
   let reditus = ref "" in
   begin
       let u = jviu f in
       lemmatise u ;
       reditus:= !reditus^lemmes#graphie;
       let min = String.lowercase u in
       if min != u then 
           begin
               lemmatise min ;
               reditus:= !reditus^lemmes#graphie;
           end;
       (* -que et -ue *)
       let insuffx = Str.replace_first (Str.regexp "q?ue$") "" u in
           if insuffx != u then 
               begin
                   lemmatise insuffx ;
                   reditus:= !reditus^lemmes#graphie;
                   (*
                   let min = String.lowercase insuffx in
                   if min != insuffx then 
                      begin
                          lemmatise min;
                          reditus:= !reditus^lemmes#graphie;
                      end;
                   *)
               end;
   end;
   let l = Str.split (Str.regexp "[\n]+") !reditus in
   let txt = sort_and_remove_doublons l in
   String.concat "\n" txt;;

let lemmatise_texte t =
   (* Lemmatisation du texte t *)
   let liste = Str.split (Str.regexp "[^a-zA-Z]+") t in
       let reditus = ref "" in
           List.iter (fun u ->
               let u = jviu u in
               lemmatise u;
               reditus:=!reditus^lemmes#graphie;
               let min = String.lowercase u in
               if min != u then 
                   begin
                      lemmatise min;
                      reditus:= !reditus^lemmes#graphie;
                   end;
           (* -que et -ue *)
           let insuffx = Str.replace_first (Str.regexp "q?ue$") "" u in
               if insuffx != u then 
                   begin
                   lemmatise insuffx;
                   reditus:= !reditus^lemmes#graphie;
                   (*
                   let min = String.lowercase insuffx in
                   if min != insuffx then 
                      begin
                          lemmatise min;
                          reditus:= !reditus^lemmes#graphie;
                      end;
                   *)
                   end;
           ) liste;
   let l = Str.split (Str.regexp "[\n]+") !reditus in
   let txt = sort_and_remove_doublons l in
   String.concat "\n" txt;;

(*   
let lemmatise_txt t = 
   (* Lemmatisation du texte t *)
   let liste = Str.split (Str.regexp "[^a-zA-Z]+") t in
   let rtabula = Hashtbl.create 500 in
   List.iter (fun u -> (
        let u = jviu u in
        lemmatise u;
        Hashtbl.iter (fun c v -> Hashtbl.replace rtabula c v) lemmes#liste;
        let min = String.lowercase u in
        if min != u then (
            lemmatise min;
            Hashtbl.iter (fun c v -> Hashtbl.replace rtabula c v) lemmes#liste);
        (* -que et -ue *)
        let sinesuffx = Str.replace_first (Str.regexp "q?ue$") "" u in
        if sinesuffx != u then (
            lemmatise sinesuffx;
            Hashtbl.iter (fun c v -> Hashtbl.replace rtabula c v) lemmes#liste)
        ))liste;
   rtabula;;
*)

let lemmatise_txt_frq t f =
   (* Lemmatisation du texte t *)
   let liste = Str.split (Str.regexp "[^a-zA-Z]+") t in
       let reditus = ref "" in
           List.iter (fun u ->
               let u = jviu u in
               lemmatise u;
               reditus:=!reditus^(lemmes#graphie_frq f) ;
               let min = String.lowercase u in
               if min != u then 
                   begin
                      lemmatise min;
                      reditus:= !reditus^(lemmes#graphie_frq f);
                   end;
           (* -que et -ue *)
           let insuffx = Str.replace_first (Str.regexp "q?ue$") "" u in
               if insuffx != u then 
                   begin
                   lemmatise insuffx;
                   reditus:= !reditus^(lemmes#graphie_frq f);
                   (*
                   let min = String.lowercase insuffx in
                   if min != insuffx then 
                      begin
                          lemmatise min;
                          reditus:= !reditus^lemmes#graphie;
                      end;
                   *)
                   end;
           ) liste;
   let l = Str.split (Str.regexp "[\n]+") !reditus in
   let txt = sort_and_remove_doublons l in
   String.concat "\n" txt;;

let lemmatise_f capsa =
   (* Lemmatisation du fichier capsa un mot par ligne *)
   print_string ("lemmatisation du fichier" ^ capsa ^ "\n");
   let c = open_in capsa in
   try
      while true do
         let l = input_line c in
         let n = String.length l in
         if n > 0 then 
             let liste = Str.split (Str.regexp "[^a-z]+") l in
             List.iter (fun u ->
                print_string ("\n"^lemmatise_u u)
             ) liste
      done
   with End_of_file -> close_in c;;

let interactif_a () =
   let repet = ref true in
   while !repet do
      print_string "---\n";
      print_string "forme : ";
      let forme = read_line () in
      let forme = jviu forme in
      if forme > "" then (
         analyse forme; analyses#affiche)
      else repet := false
      done;;

let interactif_l () =
   let repet = ref true in
   while !repet do
      (* print_string "---\n"; *)
      print_string "\nforme : ";
      let forme = read_line () in
      let forme = jviu forme in
      if forme > "" then (
         lemmatise forme; 
         print_string lemmes#graphie)
      else repet := false
      done;;

let auxilium =
     "latinus [-d | -i | -l | -t textus] \n" ^
     "  -d : dictionnaire \n" ^ 
     "  -i : analyse morpho interactive (par dfaut) \n" ^
     "  -l : lemmatisation interactive \n" ^
     "  -t textus : affiche la lemmatisation du texte textus sur la sortie standart \n" ^
     "  -h, ou toute autre option : cette aide.\n";;

if Array.length Sys.argv > 1 then
        match Array.get Sys.argv 1 with
              |"-d" -> dictionnaire () 
              |"-i" -> interactif_a () (* analyse morpho interactive *)
              |"-l" -> interactif_l () (* lemmatisation interactive *)
              |"-t" -> let c = Array.get Sys.argv 2 in lemmatise_f c (* lemmatiser un texte *)
              |_ -> print_string auxilium (* aide *)

