#######################################################################
#
#Functionality: 
#extract the dependent word and its POS 
#from output of minipar parser
#
#Useage: 
#work together with get_features.pl
#
#######################################################################

package dependency;
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(getDependency);

use strict;


sub getDependency{

    my $senno = shift(@_);
    my $sen_index = shift(@_);
    my $token = shift(@_);
    my $token_index = shift(@_);
    my $block = shift(@_);
    my @result = ();

    my ($nblock, $k, $pt1_left, $pt1_right, $pt2_left, $pt2_right);
    my ($head_form, $pos, $index_pos,  $mini);
    my ($unit_pt1_left, $unit_pt1_right, $count_nomatch, $unit_pt2_left, $unit_pt2_right);
    my (@unit, @head, @temp_pos, @block, @token_index, @blockline,);
    my (%tree);

    
    $pt1_left = @$token_index[0];
    $pt1_right = @$token_index[1];
    $pt2_left = @$token_index[2];
    $pt2_right = @$token_index[3];

    %tree = ();
    #if this sentence has a parse tree
    if(@$block[$sen_index] =~ /\S/){
	    

	#chomp @$block[$sen_index];
	#get chunk info for sen_index
	@blockline = ();
	@blockline =  split/\n+/, @$block[$sen_index];

	#store the dependency tree of this sentence in a hash
	for($k=0; $k<scalar(@blockline); $k++){
	    @unit = split/\t+/, $blockline[$k];
	    $tree{$unit[0]} = $blockline[$k];
	}

	#generate features
	@unit = split/\t+/, $tree{$pt1_left+1};
	$unit_pt1_left = $unit[1];
	$unit_pt1_left =~ s/^\(//;
	@unit = split/\t+/, $tree{$pt1_right+1};
	$unit_pt1_right = $unit[1];
	$unit_pt1_right =~ s/^\(//;
	@unit = split/\t+/, $tree{$pt2_left+1};
	$unit_pt2_left = $unit[1];
	$unit_pt2_left =~ s/^\(//;
	@unit = split/\t+/, $tree{$pt2_right+1};
	$unit_pt2_right = $unit[1];
	$unit_pt2_right =~ s/^\(//;
	

	#if boundaries of proteins in the minipar file match those in the annotated files
	if(($unit_pt1_left eq @$token[$pt1_left]) && ($unit_pt1_right eq @$token[$pt1_right]) && ($unit_pt2_left eq @$token[$pt2_left]) && ($unit_pt2_right eq @$token[$pt2_right])){

	    #find the head of M1
	    for($k=($pt1_left+1); $k<=($pt1_right+1); $k++){					
		@unit = split/\t+/, $tree{$k};


		if($unit[3] =~ /\d/){

		    #if the head of this token is outside M1
		    if(($unit[3]<($pt1_left+1)) || ($unit[3]>($pt1_right+1))){

			@head = split/\t+/, $tree{$unit[3]};
			$head_form = $head[1];
			$head_form = lc $head_form;
			$head_form =~ s/^\(//;
			@temp_pos = split/\s+/, $head[2];
			$index_pos = scalar(@temp_pos)-1;
			$pos = $temp_pos[$index_pos];


			$result[0] = $head_form;
			$result[1] = $pos;
			
			last;
		    }#if(($unit[3]<($pt1_left+1)) || ($unit[3]>($pt1_right+1)))
		}#if($unit[3] =~ /\d/)
	    }#for($k=($pt1_left+1); $k<=($pt1_right+1); $k++)

	    #find the head of M2
	    for($k=($pt2_left+1); $k<=($pt2_right+1); $k++){					
		@unit = split/\t+/, $tree{$k};


		if($unit[3] =~ /\d/){

		    #if the head of this token is outside M2
		    if(($unit[3]<($pt2_left+1)) || ($unit[3]>($pt2_right+1))){

			@head = split/\t+/, $tree{$unit[3]};
			$head_form = $head[1];
			$head_form = lc $head_form;
			$head_form =~ s/^\(//;
			@temp_pos = split/\s+/, $head[2];
			$index_pos = scalar(@temp_pos)-1;
			$pos = $temp_pos[$index_pos];

			$result[2] = $head_form;
			$result[3] = $pos;
			
			last;
		    }#if(($unit[3]<($pt2_left+1)) || ($unit[3]>($pt2_right+1)))
		}#if($unit[3] =~ /\d/)
	    }#for($k=($pt2_left+1); $k<=($pt2_right+1); $k++)

	}#if(($unit_pt1_left eq @$token[$pt1_left]) && ($unit_pt1_right eq @$token[$pt1_right]) && ($unit_pt2_left eq @$token[$pt2_left]) && ($unit_pt2_right eq @$token[$pt2_right]))
	else{
	    $count_nomatch++;
	    print "nomatch=$count_nomatch\n";
	}

    }#if(@$block[$sen_index] =~ /\S/)

    return @result;
}




