package iitb.cfilt.cpost.test;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.*;

import iitb.cfilt.cpost.*;
import iitb.cfilt.cpost.newstemmer.*;
import iitb.cfilt.cpost.crf.DataSequenceImpl;
import iitb.cfilt.cpost.crf.MorphToken;
import iitb.cfilt.cpost.ma.*;
import iitb.cfilt.cpost.vgi.*;

public class TestVGI {
	private static Stemmer stemmer;
	private static VerbGroup4 vb;
	private static MorphologicalAnalyzerRuleReader MAR;
	private static MorphologicalAnalyzer ma;
	
	public static Vector<StemmedToken> stemmedTokens;
	public static Vector<MorphologicallyAnalyzedToken> maTokens;
	
	public static void doVGI_forFile(String inFile){
		try{
			BufferedReader bf = new BufferedReader(new InputStreamReader(new FileInputStream(inFile), "UTF8"));
			String line = "";
			int [] vgi;
			int sentence_no = 1;
			while((line = bf.readLine()) != null) {
				if(line.length() != 0) {
					System.out.println ( "Sentence:" + sentence_no + "\t" );
					String tokens[] = line.split(" ");
					List l = Arrays.asList(tokens);
					Vector<String> tokenList = new Vector<String>(l);
					stemmedTokens = stemmer.stem(tokenList);
					maTokens = ma.analyze(stemmedTokens);
					vgi = vb.identifyVerbGroups1(maTokens);
					for (int i = 0; i < vgi.length; i++) {
						System.out.print(tokens[i] + "- " + vgi[i] + "\t");
					}
					System.out.println("");
				}
			}
		} catch(Exception e){
			//System.out.println(e)
			e.printStackTrace();
		}		
	}
	
	public static int[] doVGI_forSentence( String sentence, boolean flagTags ){
		String rawSentence = "";
		if(flagTags){
			String[] taggedWords = sentence.split("\\]");
			String token = "";
			for(int i=0;i<taggedWords.length;i++)
			{
				if(taggedWords[i].contains("_"))
				{
					//System.out.println("word.."+taggedWords[i]+"..done");
					String[] taggedWordComponents = taggedWords[i].split("_\\[");
					if(taggedWordComponents[0].trim()!="")
					 token = taggedWordComponents[0].trim();
					else token=".";
				}
				if(token != "")
					rawSentence = rawSentence + " " + token;
			}
		}
		else
			rawSentence = sentence;
		
		int [] retArr = new int[rawSentence.length()];
		String tokens[] = rawSentence.split(" ");
		List l = Arrays.asList(tokens);
		Vector<String> tokenList = new Vector<String>(l);
		stemmedTokens = stemmer.stem(tokenList);
		maTokens = ma.analyze(stemmedTokens);
		retArr = vb.identifyVerbGroups1(maTokens);
		
		return retArr;
	}
	
	public static void main(String args[])
	{
		
		ConfigReader.read(args[0]);
		stemmer = new Stemmer();
		vb = new VerbGroup4();	
		MAR = new MorphologicalAnalyzerRuleReader();
		ma = new MorphologicalAnalyzer();
		doVGI_forFile(args[1]);
		//CreateFile(args[1].trim(),args[2].trim(),args[3].trim(),args[4].trim());
		
	}
}
