#include <iostream>
#include <sstream>
#include <string>
#include <vector>
#include <algorithm>
#include <set>
#include <map>
#include <numeric>
#include <cmath>
#include <fstream>

using namespace std;


vector <vector<double> > normal;

void init(){
	ifstream normal_inp("knowledgebase/normal.tsv",ios::in);
	for(int i=0;i<31;i++){
		vector< double > tmpvec;
		for(int j=0;j<10;j++){
			double tmp;
			normal_inp>>tmp;
			tmpvec.push_back(tmp);
		}
		normal.push_back(tmpvec);
	}
}

bool exist(vector<string> v1, vector<string> v2){
	for(int i=0;i<v1.size();i++){
		for(int j=0;j<v2.size();j++){
			if(v1[i] == v2[j]) return true;
		}
	}
	return false;
}

double conff(double val, double mean, double sd,bool as){
	if(sd == 0 && mean == val) return 100;
	if(sd == 0) sd=sd+0.0000001;
	double z=(val-mean)/sd;
	if(z<0)z=0-z;
	if(z>3.09) return 0;
	int x=z*100;
	int i=x/10;
	int j=x%10;
	double ret = (1-2*normal[i][j])*100;
	if(as) ret = sqrt(100*ret);
	return ret/sd;
}

class Country{
public:
	vector < vector <double> > attr;
	vector< double> avg;
	vector< double> st;
	//vector<double> wt;
	void assgn(){
		for(int i=0;i<attr.size();i++){
			if(attr[i].size() == 0){
				avg[i] = -1;
				st[i] = -1;
				//cout<<"Not Found"<<endl;
				continue;
			}
			vector<double> wt(attr[i].size());
			double wt_sum=0;
			for(int kk=0;kk<wt.size();kk++){
				wt[kk] = attr[i].size()/5 + kk;
				wt_sum =  wt_sum + wt[kk];
			}
			for(int j=0;j<attr[i].size();j++){
				avg[i] = avg[i] + attr[i][j]*wt[j];
			}
			avg[i] = avg[i]/wt_sum;
			for(int j=0;j<attr[i].size();j++){
				st[i] = st[i] + wt[j]*pow(attr[i][j]-avg[i],2);
			}
			st[i] = st[i]/wt_sum;
			st[i] = sqrt(st[i]);
		}
	}
	Country(){
		attr.resize(11);
		avg.resize(11);
		st.resize(11);
		//wt.resize(11);
	}
	void print(){
		for(int i=0;i<avg.size();i++){
			cout<<avg[i]<<"     "<<st[i]<<endl;
		}
	}
};

vector<string> read(string s,int &y){
	vector<string> v;
	istringstream iss(s);
	string word;
	while(iss>>word){
		int x=int(word[word.size()-1]);
		if(x<48 || (x>57 && x<65) || (x>90 && x<97) || x>122){
			if(word[word.size()-1] == '.') y = v.size();
			if(word.size() > 1) v.push_back(word.substr(0,word.size()-1));
		}
		else v.push_back(word);
	}
	return v;
}

bool has_space(string &s){
	for(int i=0;i<s.size();i++){
		if(s[i] == ' '){
			return true;
		}
	}
	return false;
}

bool is_double(string &s){
	if(s[0] >= 'a' && s[0] <= 'z' || s[0] >= 'A' && s[0] <= 'Z') return false;
	return true;
} 

int main(){
	init();
	ofstream out("output",ios::out);
	map<string,int> ind;
	map<int,string> rev_ind;

	ind["AG.LND.TOTL.K2"]=0;
	ind["BN.KLT.DINV.CD"]=1;
	ind["BX.GSR.MRCH.CD"]=2;
	ind["EG.ELC.PROD.KH"]=3;
	ind["EN.ATM.CO2E.KT"]=4;
	ind["EP.PMP.DESL.CD"]=5;
	ind["FP.CPI.TOTL.ZG"]=6;
	ind["IT.NET.USER.P2"]=7;
	ind["NY.GDP.MKTP.CD"]=8;
	ind["SP.DYN.LE00.IN"]=9;
	ind["SP.POP.TOTL"]=10;

	
	for (map<string,int>::iterator i = ind.begin(); i != ind.end(); ++i)
    rev_ind[i->second] = i->first;
	
	vector< vector <string> > vv(11);
	
	vector<string> units(11),answ(11);
	
	answ[0]  = "Land Area"             ;
	answ[1]  = "FDI"                   ;
	answ[2]  = "Goods Export"          ;
	answ[3]  = "Electricity production";
	answ[4]  = "CO2 Emission"          ;
	answ[5]  = "Diesel Fuel Price"     ;
	answ[6]  = "Inflation"             ;
	answ[7]  = "Internet Users"        ;
	answ[8]  = "GDP"                   ;
	answ[9]  = "Life Expectency"       ;
	answ[10] = "Population"            ;
	                                   ;
	units[0]  = "sq. km"               ;
	units[1]  = "USD"                  ;
	units[2]  = "USD"                  ;
	units[3]  = "kWh"                  ;
	units[4]  = "kiloton"              ;
	units[5]  = "USD per Litre"        ;
	units[6]  = "percent"              ;
	units[7]  = "per 100 people"       ;
	units[8]  = "USD"                  ;
	units[9]  = "years"                ;
	units[10] = ""                     ;
	
	
	vv[0] = { "land","area","Land","Area","sq." ,"km","square","kilometre","Kilometre","Kilometer","kilometer" } ;
	vv[1] = { "foreign","direct","investment","Foreign","Direct","Investment","net","BoP","dollar" };
	vv[2] = { "goods","export","Goods","Exports","BoP","dollar" } ;
	vv[3] = { "electricity","production","Electricity","Production","kWh","kilowatt-hour" };
	vv[4] = { "co2","emissions","CO2","Emissions","kt","kiloton" };
	vv[5] = { "diesel","fuel","price","Pump","price","diesel","fuel","liter","dollar"} ;
	vv[6] = { "inflation","Inflation","consumer"};
	vv[7] = { "internet","users","Internet","Users"};		
	vv[8] = { "gross","domestic","GDP","dollar"};
	vv[9] = { "life","expectancy","birth","Life","Expectancy","Birth"};
	vv[10]= { "population","Population"};

	


	map<string,string> cnt;   //s2->s1
	map<string,string> cnt12;
	map<string,int> cnt_pos;  //s1->pos in info vector.
	string s1,s2,s3;
	ifstream cnt_in("knowledgebase/countries_id_map.txt",ios::in);
	int sz = 0;
	while(getline(cnt_in,s1)){
		istringstream iss(s1);
		string word;
		iss>>word;
		s3 = word;
		iss>>word;
		s2 = s2 + word;
		while(iss>>word){
			s2 = s2 + " " + word;
		}
		s1 = s3;
		//cout<<s1<<  "   "<<s2<<endl;
		//if(s2=="Bulgaria"){ cout<<s1<<endl; return 0;}
		//s1 = s3;
		if(cnt.find(s2) == cnt.end()){
			cnt[s2] = s1;
			cnt12[s1] = s2;
		}
		else if(has_space(s2)){
			cnt[s2] = s1;
		}
		cnt_pos.insert(pair<string,int>(s1,sz));
		sz++;
		s2 = "";
	}
	//return 0;
	vector < vector <string> > about(11);
	

	vector<Country> info(cnt.size());
	ifstream train_in("knowledgebase/kb-facts-train_SI.tsv",ios::in);
	double value;
	while(train_in>>s1>>value>>s3){
		//if(s1 == "/m/015qh") cout<<ind[s3]<<"  "<<value<<endl;
		info[cnt_pos[s1]].attr[ind[s3]].push_back(value);
	}

	for(int i=0;i<info.size();i++){
		info[i].assgn();
	}
	//cout<<"AA"<<cnt_pos["/m/015qh"]<<"  "<<cnt["Bulgaria"]<<endl;
	ifstream in("sentences.tsv",ios::in);
	string t;
	int iii=0;
	//cout<<endl<<endl;
	//info[cnt_pos[cnt["India"]]].print();
	while(getline(in,t)){
		iii++;
		int fs;
		
		vector<string> v=read(t,fs);
		string id=v[0];
		vector<string> curr_cnt;
		vector<double> curr_val;
		int j = fs+1;
		for(;j<v.size();j++){
			if(!is_double(v[j])) break;
			curr_val.push_back(stod(v[j]));
		}
		for(;j<v.size();j++){
			curr_cnt.push_back(v[j]);
		}
		
		
		for(int x=0;x<curr_val.size();x++){
			int max_cnt=-1,max_atrr=0;double conf_sum=0,conf_max=0;
			for(int k=0;k<curr_cnt.size();k++){
			
			if(cnt.find(curr_cnt[k]) == cnt.end()) continue;
			Country ncnt = info[cnt_pos[cnt[curr_cnt[k]]]];
						
				
				for(int y=0;y<ncnt.avg.size();y++){
					bool ww = exist(v,vv[y]);
					double zzz=conff(curr_val[x],ncnt.avg[y],ncnt.st[y],ww);
					conf_sum+=zzz;
					if(zzz > conf_max)
					{
						max_atrr=y;
						max_cnt=k;
						conf_max=zzz;
					} 
				}
			}
		
		if(max_cnt != -1){
			if(conf_max*conf_max/conf_sum*info[cnt_pos[cnt[curr_cnt[max_cnt]]]].st[max_atrr] > 10){
			out<<id<<"   "<<curr_cnt[max_cnt]<<"  "<<answ[max_atrr]<<" = "<<curr_val[x]<<" "<<
		units[max_atrr]<<"     with  "<<conf_max*conf_max/conf_sum*info[cnt_pos[cnt[curr_cnt[max_cnt]]]].st[max_atrr]<<"% confidence."<<endl;
		}}
		
		
	}}
}
