	
/* *************************************************************************************** */
/*Nikhilesh Sharma */

package iitb.cfilt.cpost.vgi;

	import iitb.cfilt.cpost.ConfigReader;
	import iitb.cfilt.cpost.UTFWriter;
	import iitb.cfilt.cpost.ma.*;
	import iitb.cfilt.cpost.newstemmer.*;

	import java.io.File;
import java.util.*;

	public class VerbGroup4 {

		private static VerbGroupIdentifierRuleReader vgr;

		private static UTFWriter ob;

		/* we are not using VGIMarking file */		
		public VerbGroup4() {
			try {
				vgr = new VerbGroupIdentifierRuleReader();
				File outfile = new File(ConfigReader.get("VGI.VGMarkingFile"));
				if (outfile.exists()) {
					outfile.delete();
					outfile = new File(ConfigReader.get("VGI.VGMarkingFile"));
				}
				ob = new UTFWriter(outfile);
			} catch (Exception e) {
				System.out.println(e.toString());
				e.printStackTrace();
			}
		}
		
		//public int [] identifyVerbGroups1(Vector<MorphologicallyAnalyzedToken> matv){
			
		//}
		
		public int [] identifyVerbGroups1(
				Vector<MorphologicallyAnalyzedToken> matv) {
			// Vector<Integer> vgi = new Vector<Integer>();
			int[] vgi = new int[matv.size()];
			int currentVGstartedAt = 0;
			int VGnumber = 1;
			int isEnd=0 , isAbort=0;
			//String last = new String();
			boolean ContinueVG = false;
			
			for (int i = 0; i < matv.size(); i++) {
				vgi[i] = 0;
				String token = matv.get(i).getToken();
				System.out.println("Working for " + token);
				// HashSet<String> tempHS = tokenCategories.get(i);
				if (matv.get(i).getStemmedOutputs() != null
						&& matv.get(i).getStemmedOutputs().size() > 0
						&& matv.get(i).getStemmedOutputs().get(0).getCategory()
								.trim().equals("SYM")) {
					vgi[i] = 0;
					 System.out.println("SYM");
					
					continue;
				}
				if (matv.get(i).getStemmedOutputs() != null
						&& matv.get(i).getStemmedOutputs().size() > 0
						&& (matv.get(i).getAmbiguityScheme().contains("verb") || matv
								.get(i).getAmbiguityScheme().contains("verb_aux"))) {
					
					 if(ContinueVG == false){ 					
					currentVGstartedAt = i;										
					}  
					
					for (int j = 0; j < matv.get(i).getStemmedOutputs().size(); j++) {					
						
						StemmerRuleResult srr = matv.get(i).getStemmedOutputs().get(j);
						boolean endWord = vgr.mustEndWords.contains(token);
						boolean isVerb = isVerb(srr);
						boolean isVerbAux = isVerbAux(srr);
						boolean endSuffix = vgr.mustEndSuffixes.contains(srr.getSuffixList().get(0));
						boolean isLastWordofSentence = ( i == matv.size()-1 );
						isEnd=0;
						isAbort=0;
						
						if ((endWord || endSuffix || isLastWordofSentence) && (isVerb || isVerbAux)) {
							/* End VG */
							//System.out.println("End here.1");
							//In case of single word VG, if the word has verb/vaux catagory only in the catagory list
							// then make it VM otherwise there can be ambiguity so leave it.
							if(ContinueVG || (isOnlyVerb(matv.get(i)) && isVerb(matv.get(i))))
								vgi[i] = VGnumber; //vgi[i-1]+1;
							VGnumber = 1;
							ContinueVG = false;
							isEnd=1;
							break;							
						}						
						else if(!isLastWordofSentence && (isVerb || isVerbAux)){
							boolean isNextVerbAux = false, isNextVerb = false;
							isNextVerb = matv.get(i + 1).getAmbiguityScheme().contains("verb");
							isNextVerbAux = matv.get(i + 1).getAmbiguityScheme().contains("verb_aux");
							if (!isNextVerbAux) {
								if(isNextVerb){
									/* check suffix agreement */
									if(agree(srr, matv.get(i+1))){
										/* End */
										//System.out.println("End here.2");
										vgi[i]=VGnumber;//vgi[i-1]+1;
										vgi[i+1]=VGnumber;
										VGnumber = 1;
										i++;
										ContinueVG = false;
										isEnd=1;
										break;
									}
									else{
										/* Abort */
										//System.out.println("Abort here.1");
										i = currentVGstartedAt; 
										VGnumber = 1;
										vgi[i] = 0;
										System.out.println("Going back to : " + i);
										ContinueVG = false;
										isAbort=1;
										break;
									}
								}
								else{ 
									/* End */
									//System.out.println("End here.3");
									//In case of single word VG, if the word has verb/vaux catagory only in the catagory list
									// then make it VM otherwise there can be ambiguity so leave it.
									if(ContinueVG || (isOnlyVerb(matv.get(i)) && isVerb(matv.get(i))))
										vgi[i]=VGnumber;//vgi[i-1]+1;
									VGnumber = 1;
									ContinueVG = false;
									isEnd=1;
									break;									
								}
							}
							
							/* check if suffix exists. */
							else if(matv.get(i).getStemmedOutputs().get(j).getSuffixList().size()>0 && !(matv.get(i).getStemmedOutputs().get(j).getSuffixList().get(0).isEmpty())){
								  if(vgr.adverbialSuffix_Words.containsKey(matv.get(i).getStemmedOutputs().get(j).getSuffixList().get(0)) && vgr.adverbialSuffix_Words.get(matv.get(i).getStemmedOutputs().get(j).getSuffixList().get(0)).equals(matv.get(i+1).getToken())){
										/* Adverbial will end at next word */
										//System.out.println("Started at : " + i);
										vgi[i] = VGnumber;
										vgi[i+1] = VGnumber + 1;
										VGnumber = 1;
										i++;
										ContinueVG = false;
										currentVGstartedAt = i;
										isEnd=1;
										break;				
								}
								else if(ContinueVG){ 
										if(vgr.hasSuffix(matv.get(i).getStemmedOutputs().get(j).getSuffixList().get(0))){
										boolean foundFlag = false;
										for(int k = 0; k < matv.get(i+1).getStemmedOutputs().size(); k++) {										
											if(vgr.getNextVerbs(matv.get(i).getStemmedOutputs().get(j).getSuffixList().get(0)).contains(matv.get(i+1).getStemmedOutputs().get(k).getRoot())){												
												foundFlag = true;
												break;											
											}
										}
										if(!foundFlag && !vgr.adverbialSuffixes.contains(matv.get(i).getStemmedOutputs().get(j).getSuffixList().get(0))){
											/*  Continue VG  */
											
										//}											
										//else{
											/* Abort VG */
											//System.out.println("Abort here.1");
											i = currentVGstartedAt;
											VGnumber = 1;
											vgi[i] = 0;
											System.out.println("Going back to : " + i);
											ContinueVG = false;
											isAbort=1;
											break;
										}										
									} 
								}								  
								else{ // it's a main verb
									/* Start VG */ 
									//System.out.println("Start VG.1");
									vgi[i] = VGnumber;
									VGnumber++;
									currentVGstartedAt = i;			
									ContinueVG = true;
									break;
								}							
						}
						else{   // No suffix
							if(ContinueVG){ // Its not main verb.
								boolean foundFlag = false;
								if(vgr.hasRootVerb(matv.get(i).getStemmedOutputs().get(j).getRoot())){
									//boolean foundFlag = false;
									for(int k = 0; k < matv.get(i+1).getStemmedOutputs().size(); k++) {										
										if(vgr.getNextAuxVerbs(matv.get(i).getStemmedOutputs().get(j).getRoot()).contains(matv.get(i+1).getStemmedOutputs().get(k).getRoot())){												
											foundFlag = true;
											break;											
										}
									}
									//if(foundFlag){
										/*  Continue VG  */
										
									//}								
								//else{								
								}
								else{
									for(int k = 0; k < matv.get(i+1).getStemmedOutputs().size(); k++) {
										if( vgr.getNextVerbs("null").contains(matv.get(i+1).getStemmedOutputs().get(k).getRoot()) ){
											foundFlag = true;
											break;
										}
									}
									
								}
									if(!foundFlag){
										if(vgr.adverbialWords.contains(matv.get(i).getStemmedOutputs().get(j).getRoot())){
											/* End Vg */
											//System.out.println("End here.3");
											// In case of single word VG, if the word has verb/vaux catagory only in the catagory list
											// then make it VM otherwise there can be ambiguity so leave it.
											if(ContinueVG || (isOnlyVerb(matv.get(i)) && isVerb(matv.get(i))))
												vgi[i]=VGnumber;//vgi[i-1]+1;
											VGnumber = 1;
											ContinueVG = false;
											isEnd=1;
											break;
										}
									else {
										/* Abort VG */
										//System.out.println("Abort here.2");									
										i = currentVGstartedAt;
										VGnumber = 1;
										vgi[i] = 0;
										System.out.println("Going back to : " + i);
										ContinueVG = false;
										isAbort=1;
										break;
									}
								}
							}
							
							else{ // it a main verb
								/* Start VG */
								//System.out.println("Start VG.1");
								vgi[i] = VGnumber;
								VGnumber++;
								currentVGstartedAt = i;			
								ContinueVG = true;
								break;
							}							
							
						}
							
					
						//if((isEnd==0)&&(isAbort==0)){ //if not end and abort , then continue
							vgi[i]=VGnumber;// vgi[i-1]+1;
							VGnumber++;
							break;
						  // }
						//if(isEnd==1 || isAbort==1) break;
					}   
				}					
			}
		}
		
									
		for (int i = 0; i < vgi.length; i++) {
			System.out.print(matv.get(i).getToken() + "- " + vgi[i] + "\t");
		}
		//return matv;
		return vgi;
	}
		
		boolean isOnlyVerb(MorphologicallyAnalyzedToken mat){
			boolean retval = false;
			boolean isOnlyVerbMain = isVerb(mat) && !isVerbAux(mat) && mat.getStemmedOutputs().size() == 1;
			boolean isOnlyVerbAux = !isVerb(mat) && isVerbAux(mat) && mat.getStemmedOutputs().size() == 1;
			boolean isOnlyVerbORVerbAux = isVerb(mat) && isVerbAux(mat) && mat.getStemmedOutputs().size() == 2;
			retval = isOnlyVerbAux || isOnlyVerbMain || isOnlyVerbORVerbAux;
			return retval;
		}
		boolean hasSuffix(StemmerRuleResult srr){
			boolean retval = false;
			retval = srr.getSuffixList().size()>0 && !(srr.getSuffixList().get(0).isEmpty()); 
			return retval;
		}

		boolean isVerb(StemmerRuleResult srr){
			boolean retval = false;
			retval = srr.getCategory().equals("verb"); 
			return retval;
		}

		boolean isVerb(MorphologicallyAnalyzedToken mat){
			boolean retval = false;
			retval = mat.getAmbiguityScheme().contains("verb"); 
			return retval;
		}

		boolean isVerbAux(MorphologicallyAnalyzedToken mat){
			boolean retval = false;
			retval = mat.getAmbiguityScheme().contains("verb_aux"); 
			return retval;
		}

		boolean isVerbAux(StemmerRuleResult srr){
			boolean retval = false;
			retval = srr.getCategory().equals("verb_aux"); 
			return retval;
		}
		
		boolean agree(StemmerRuleResult srr, MorphologicallyAnalyzedToken mat){
			boolean retval = false;
			for(int i = 0; i<mat.getStemmedOutputs().size(); i++){
				if(mat.getStemmedOutputs().get(i).getSuffixList().contains(srr.getSuffixList().get(0))){
					retval = true;
					break;
				}
			}
			return retval;
		}
		
		

		public static void main(String args[]) {
			ConfigReader.read(args[0]);
			Stemmer stemmer = new Stemmer();
			VerbGroup4 vb = new VerbGroup4();
			Vector<StemmedToken> stemmedTokens = new Vector<StemmedToken>();
			Vector<MorphologicallyAnalyzedToken> maTokens = new Vector<MorphologicallyAnalyzedToken>();
			String sentence = "शरीर कांप उठा था";//मुझे दौड़कर पकड़ लिया";//देखने- सुनने में भी बुरी न थी";
			//ज्ञानेश्वर जी की विश्वविख्यात पुस्तक ज्ञानेश्वरी को लिपिबद्ध किया था";//जो भी देखा सुना उसे संजोए";//"संबंधों में सुधार की दिशा में इसे एक महत्वपूर्ण पड़ाव माना जा रहा है";//"माना जा रहा है कि चीनी कम्युनिस्ट पार्टी के एक उच्च अधिकारी उत्तर कोरिया के नेता किम जॉंग इल से मिलने जा रहे हैं";//माना 
			//"जब कहीं न पाया , तो छत पर आई और उसे देखते ही हँसकर बोली तुम यहाँ आकर छिपी बैठी हो और मैं तुम्हें ढूँढ़ती फिरती हूं";
			//"राम खाना खाता है ";
			//"राम खाने वाला था";
			//"राम खाना खाता और चला जाता है";
			//"राम खाने वाला था";
			//"राम खाते हुए भागा ";
			//"राम यह काम कर रहा होगा";
			//"राम को खाते रहना पड़ता है";
			//"राम काम कर चुका देगा"; 
			//"राम खाना खाता है ";
			//"राम कर चुका होगा";
			//"राम जाना जाता है";
			//"राम कर चुका देगा";
			//"आप खाना खाकर ही जाना";
			//"राम करता रह सकता था";
														// 
			String tokens[] = sentence.split(" ");
			int [] vgi = new int[sentence.length()];
			List l = Arrays.asList(tokens);
			Vector<String> tokenList = new Vector<String>(l);
			MorphologicalAnalyzerRuleReader MAR = new MorphologicalAnalyzerRuleReader();
			MorphologicalAnalyzer ma = new MorphologicalAnalyzer();
			stemmedTokens = stemmer.stem(tokenList);
			maTokens = ma.analyze(stemmedTokens);
			vgi = vb.identifyVerbGroups1(maTokens);
			
		}
	}

