#!/usr/bin/perl -w

$dir = "./";
opendir( DIR, $dir ) or die "CANNOT OPEN \'$dir\'\n";
rewinddir( DIR );

open( g1FILE, "bemails.3-1sortp" ) or die "CANNOT OPEN \'1-grams\'\n";
open( g2FILE, "bemails.3-2sortp" ) or die "CANNOT OPEN \'2-grams\'\n";
open( g3FILE, "bemails.3-3sortp" ) or die "CANNOT OPEN \'3-grams\'\n";
@g1lines = <g1FILE>;
@g2lines = <g2FILE>;
@g3lines = <g3FILE>;
close( g1FILE );
close( g2FILE );
close( g3FILE );

open( nFILE, "lexN" ) or die "CANNOT OPEN \'lexN\'\n";
open( vFILE, "lexV" ) or die "CANNOT OPEN \'lexN\'\n";
open( mFILE, "lexM" ) or die "CANNOT OPEN \'lexN\'\n";
open( jFILE, "lexJ" ) or die "CANNOT OPEN \'lexN\'\n";
open( yFILE, "lexY" ) or die "CANNOT OPEN \'lexN\'\n";
open( dFILE, "lexD" ) or die "CANNOT OPEN \'lexN\'\n";
open( uFILE, "lexU" ) or die "CANNOT OPEN \'lexN\'\n";
open( qFILE, "lexQ" ) or die "CANNOT OPEN \'lexN\'\n";
open( pFILE, "lexP" ) or die "CANNOT OPEN \'lexN\'\n";
open( cFILE, "lexC" ) or die "CANNOT OPEN \'lexN\'\n";
@N   = <nFILE>;
@V   = <vFILE>;
@ADJ = <jFILE>;
@ADV = <yFILE>;
@DET = <dFILE>;
@P   = <pFILE>;
#@MOD = <mFILE>;
#@NUM = <uFILE>;
#@QAN = <qFILE>;
#@COM = <cFILE>;
close( nFILE );
close( vFILE );
close( mFILE );
close( jFILE );
close( yFILE );
close( dFILE );
close( uFILE );
close( qFILE );
close( pFILE );
close( cFILE );

print "\t";
$count = 0;
$word1 = "";
$word2 = "";
foreach $arg ( @ARGV )
{ 
    $word3 = "";
    if( $arg =~ /-h/ )
    {
        print "usage: \n";
        # print "\t genbo -r \n";
        print "\t genbo POS (POS)* \n";
        print "\t e.g. POS = {N,V,ADJ,ADV,P,DET} \n";
    }
    # ADJ
    elsif( $arg =~ /ADJ/ )
    {
        if( $count == 0 )
        {
            # use unigrams: pr w1 wt
            $done = 0;
            foreach $line ( @g1lines )
            {
                if( $done == 0 )
                { 
                    @tokens = split " ", $line; 
                    $w1 = $tokens[1]; 
                    foreach $x ( @ADJ ) 
                    { 
                        chomp( $x );
                        if( $w1 eq $x ) 
                        { 
                            $done = 1; 
                            $word3 = $x; 
                            last; 
            } } } }
            if( $done == 0 )
            {
                $len = scalar( @ADJ );
                $randpos = rand $len;
                $word3 = $ADJ[ $randpos ];
            }
        }
        elsif( $count == 1 )
        {
            # use bigrams: pr w1 w2 wt
            $done = 0;
            foreach $line ( @g2lines )
            {
                if( $done == 0 )
                {
                    @tokens = split " ", $line; 
                    $w1 = $tokens[1]; 
                    if( $w1 eq $word2 ) # if consistent with what we have 
                    { 
                        $w2 = $tokens[2]; 
                        foreach $x ( @ADJ ) 
                        { 
                            chomp( $x ); 
                            if( $w2 eq $x ) 
                            { 
                                $done = 1; 
                                $word3 = $x; 
                                last; 
                    } } } # else check next bigram
            } }
            if( $done == 0 )
            {
                $len = scalar( @ADJ );
                $randpos = rand $len;
                $word3 = $ADJ[ $randpos ];
            }
        }
        else
        {
            # use trigrams: pr w1 w2 w3
            $done = 0;
            foreach $line ( @g3lines )
            { 
                if( $done == 1 )
                {
                    @tokens = split " ", $line; 
                    $w1 = $tokens[1]; 
                    $w2 = $tokens[2]; 
                    if(( $w1 eq $word1 )&&( $w2 eq $word2 )) # if consistent
                    { 
                        $w3 = $tokens[3]; 
                        foreach $x ( @ADJ ) 
                        {
                            chomp( $x ); 
                            if( $w3 eq $x ) 
                            { 
                                $done = 1;
                                $word3 = $x;
                                last;
                    } } } # else check next trigram
            } }
            if( $done == 0 )
            {
                $len = scalar( @ADJ );
                $randpos = rand $len;
                $word3 = $ADJ[ $randpos ];
            }
        }
        chomp( $word3 );
        print "$word3 ";
        $count++; 
        $word2 = $word3;
        $word1 = $word2;
    }
    # DET
    elsif( $arg =~ /DET/ )
    {
        if( $count == 0 )
        {   # use unigrams
            $done = 0;
            foreach $line ( @g1lines )
            {   if( $done == 0 )
                {   @tokens = split " ", $line; $w1 = $tokens[1]; 
                    foreach $x ( @DET ) 
                    {   chomp( $x );
                        if( $w1 eq $x ) 
                        { $done = 1; $word3 = $x; last; 
            } } } }
            if( $done == 0 )
            {   $len = scalar( @DET ); $randpos = rand $len;
                $word3 = $DET[ $randpos ];
            }
        }
        elsif( $count == 1 )
        {   # use bigrams
            $done = 0;
            foreach $line ( @g2lines )
            {   if( $done == 0 )
                {   @tokens = split " ", $line; 
                    $w1 = $tokens[1]; 
                    if( $w1 eq $word2 ) # if consistent with what we have 
                    {   $w2 = $tokens[2]; 
                        foreach $x ( @DET ) 
                        {   chomp( $x ); 
                            if( $w2 eq $x ) 
                            { $done = 1; $word3 = $x; last; 
                    } } } # else check next bigram
            } }
            if( $done == 0 )
            {   $len = scalar( @DET );
                $randpos = rand $len;
                $word3 = $DET[ $randpos ];
            }
        }
        else
        {   # use trigrams
            $done = 0;
            foreach $line ( @g3lines )
            {   if( $done == 1 )
                {   @tokens = split " ", $line; 
                    $w1 = $tokens[1]; 
                    $w2 = $tokens[2]; 
                    if(( $w1 eq $word1 )&&( $w2 eq $word2 )) # if consistent
                    {   $w3 = $tokens[3]; 
                        foreach $x ( @DET ) 
                        {   chomp( $x ); 
                            if( $w3 eq $x ) 
                            { $done = 1; $word3 = $x; last;
                    } } } # else check next trigram
            } }
            if( $done == 0 )
            {   $len = scalar( @DET );
                $randpos = rand $len;
                $word3 = $DET[ $randpos ];
            }
        }
        chomp( $word3 );
        print "$word3 ";
        $count++; 
        $word2 = $word3;
        $word1 = $word2;
    }
    # ADV
    elsif( $arg =~ /ADV/ )
    {
        if( $count == 0 )
        {   # use unigrams
            $done = 0;
            foreach $line ( @g1lines )
            {   if( $done == 0 )
                {   @tokens = split " ", $line; $w1 = $tokens[1]; 
                    foreach $x ( @ADV ) 
                    {   chomp( $x );
                        if( $w1 eq $x ) 
                        { $done = 1; $word3 = $x; last; 
            } } } }
            if( $done == 0 )
            {   $len = scalar( @ADV ); $randpos = rand $len;
                $word3 = $ADV[ $randpos ];
            }
        }
        elsif( $count == 1 )
        {   # use bigrams
            $done = 0;
            foreach $line ( @g2lines )
            {   if( $done == 0 )
                {   @tokens = split " ", $line; 
                    $w1 = $tokens[1]; 
                    if( $w1 eq $word2 ) # if consistent with what we have 
                    {   $w2 = $tokens[2]; 
                        foreach $x ( @ADV ) 
                        {   chomp( $x ); 
                            if( $w2 eq $x ) 
                            { $done = 1; $word3 = $x; last; 
                    } } } # else check next bigram
            } }
            if( $done == 0 )
            {   $len = scalar( @ADV );
                $randpos = rand $len;
                $word3 = $ADV[ $randpos ];
            }
        }
        else
        {   # use trigrams
            $done = 0;
            foreach $line ( @g3lines )
            {   if( $done == 1 )
                {   @tokens = split " ", $line; 
                    $w1 = $tokens[1]; 
                    $w2 = $tokens[2]; 
                    if(( $w1 eq $word1 )&&( $w2 eq $word2 )) # if consistent
                    {   $w3 = $tokens[3]; 
                        foreach $x ( @ADV ) 
                        {   chomp( $x ); 
                            if( $w3 eq $x ) 
                            { $done = 1; $word3 = $x; last;
                    } } } # else check next trigram
            } }
            if( $done == 0 )
            {   $len = scalar( @ADV );
                $randpos = rand $len;
                $word3 = $ADV[ $randpos ];
            }
        }
        chomp( $word3 );
        print "$word3 ";
        $count++; 
        $word2 = $word3;
        $word1 = $word2;
    }
    # V
    elsif( $arg =~ /V/ )
    {
        if( $count == 0 )
        {   # use unigrams
            $done = 0;
            foreach $line ( @g1lines )
            {   if( $done == 0 )
                {   @tokens = split " ", $line; $w1 = $tokens[1]; 
                    foreach $x ( @V ) 
                    {   chomp( $x );
                        if( $w1 eq $x ) 
                        { $done = 1; $word3 = $x; last; 
            } } } }
            if( $done == 0 )
            {   $len = scalar( @V ); $randpos = rand $len;
                $word3 = $V[ $randpos ];
            }
        }
        elsif( $count == 1 )
        {   # use bigrams
            $done = 0;
            foreach $line ( @g2lines )
            {   if( $done == 0 )
                {   @tokens = split " ", $line; 
                    $w1 = $tokens[1]; 
                    if( $w1 eq $word2 ) # if consistent with what we have 
                    {   $w2 = $tokens[2]; 
                        foreach $x ( @V ) 
                        {   chomp( $x ); 
                            if( $w2 eq $x ) 
                            { $done = 1; $word3 = $x; last; 
                    } } } # else check next bigram
            } }
            if( $done == 0 )
            {   $len = scalar( @V );
                $randpos = rand $len;
                $word3 = $V[ $randpos ];
            }
        }
        else
        {   # use trigrams
            $done = 0;
            foreach $line ( @g3lines )
            {   if( $done == 1 )
                {   @tokens = split " ", $line; 
                    $w1 = $tokens[1]; 
                    $w2 = $tokens[2]; 
                    if(( $w1 eq $word1 )&&( $w2 eq $word2 )) # if consistent
                    {   $w3 = $tokens[3]; 
                        foreach $x ( @V ) 
                        {   chomp( $x ); 
                            if( $w3 eq $x ) 
                            { $done = 1; $word3 = $x; last;
                    } } } # else check next trigram
            } }
            if( $done == 0 )
            {   $len = scalar( @V );
                $randpos = rand $len;
                $word3 = $V[ $randpos ];
            }
        }
        chomp( $word3 );
        print "$word3 ";
        $count++; 
        $word2 = $word3;
        $word1 = $word2;
    }
    # N
    elsif( $arg =~ /N/ )
    {
        if( $count == 0 )
        {   # use unigrams
            $done = 0;
            foreach $line ( @g1lines )
            {   if( $done == 0 )
                {   @tokens = split " ", $line; $w1 = $tokens[1]; 
                    foreach $x ( @N ) 
                    {   chomp( $x );
                        if( $w1 eq $x ) 
                        { $done = 1; $word3 = $x; last; 
            } } } }
            if( $done == 0 )
            {   $len = scalar( @N ); $randpos = rand $len;
                $word3 = $N[ $randpos ];
            }
        }
        elsif( $count == 1 )
        {   # use bigrams
            $done = 0;
            foreach $line ( @g2lines )
            {   if( $done == 0 )
                {   @tokens = split " ", $line; 
                    $w1 = $tokens[1]; 
                    if( $w1 eq $word2 ) # if consistent with what we have 
                    {   $w2 = $tokens[2]; 
                        foreach $x ( @N ) 
                        {   chomp( $x ); 
                            if( $w2 eq $x ) 
                            { $done = 1; $word3 = $x; last; 
                    } } } # else check next bigram
            } }
            if( $done == 0 )
            {   $len = scalar( @N );
                $randpos = rand $len;
                $word3 = $N[ $randpos ];
            }
        }
        else
        {   # use trigrams
            $done = 0;
            foreach $line ( @g3lines )
            {   if( $done == 1 )
                {   @tokens = split " ", $line; 
                    $w1 = $tokens[1]; 
                    $w2 = $tokens[2]; 
                    if(( $w1 eq $word1 )&&( $w2 eq $word2 )) # if consistent
                    {   $w3 = $tokens[3]; 
                        foreach $x ( @N ) 
                        {   chomp( $x ); 
                            if( $w3 eq $x ) 
                            { $done = 1; $word3 = $x; last;
                    } } } # else check next trigram
            } }
            if( $done == 0 )
            {   $len = scalar( @N );
                $randpos = rand $len;
                $word3 = $N[ $randpos ];
            }
        }
        chomp( $word3 );
        print "$word3 ";
        $count++; 
        $word2 = $word3;
        $word1 = $word2;
    }
    # P
    elsif( $arg =~ /P/ )
    {
        if( $count == 0 )
        {   # use unigrams
            $done = 0;
            foreach $line ( @g1lines )
            {   if( $done == 0 )
                {   @tokens = split " ", $line; $w1 = $tokens[1]; 
                    foreach $x ( @P ) 
                    {   chomp( $x );
                        if( $w1 eq $x ) 
                        { $done = 1; $word3 = $x; last; 
            } } } }
            if( $done == 0 )
            {   $len = scalar( @P ); $randpos = rand $len;
                $word3 = $P[ $randpos ];
            }
        }
        elsif( $count == 1 )
        {   # use bigrams
            $done = 0;
            foreach $line ( @g2lines )
            {   if( $done == 0 )
                {   @tokens = split " ", $line; 
                    $w1 = $tokens[1]; 
                    if( $w1 eq $word2 ) # if consistent with what we have 
                    {   $w2 = $tokens[2]; 
                        foreach $x ( @P ) 
                        {   chomp( $x ); 
                            if( $w2 eq $x ) 
                            { $done = 1; $word3 = $x; last; 
                    } } } # else check next bigram
            } }
            if( $done == 0 )
            {   $len = scalar( @P );
                $randpos = rand $len;
                $word3 = $P[ $randpos ];
            }
        }
        else
        {   # use trigrams
            $done = 0;
            foreach $line ( @g3lines )
            {   if( $done == 1 )
                {   @tokens = split " ", $line; 
                    $w1 = $tokens[1]; 
                    $w2 = $tokens[2]; 
                    if(( $w1 eq $word1 )&&( $w2 eq $word2 )) # if consistent
                    {   $w3 = $tokens[3]; 
                        foreach $x ( @P ) 
                        {   chomp( $x ); 
                            if( $w3 eq $x ) 
                            { $done = 1; $word3 = $x; last;
                    } } } # else check next trigram
            } }
            if( $done == 0 )
            {   $len = scalar( @P );
                $randpos = rand $len;
                $word3 = $P[ $randpos ];
            }
        }
        chomp( $word3 );
        print "$word3 ";
        $count++; 
        $word2 = $word3;
        $word1 = $word2;
    }
}
print "\n";

