# Opens the link grammar executable as a subprocess, and handles submitting 
# input to and reading responses from the link process.

class Link

   def initialize

      # HARDCODE WARNING - the name of the executable changes
      @lg = IO.popen("link41b", "w+") # windows
      #@lg = IO.popen("parse", "w+") # linux

      s = @lg.gets
      while s !~ /.*4.0.affix/ 
         p s
         s = @lg.gets
      end
      p s
      @lg.puts("!constituents=2")
      s = @lg.gets
      p s
   end

   def getResponse(input="")
      p 'INPUT==' + input
      # check for words that are too long
      if input =~ /(\w){20,20}/
        puts "There is a word that is too long."
        return "There is a word that is too long."
      end
      if input.size > 300
        return "The input exceeded maximum length."
      end
      if input == "\"\""
         return "Sorry I didn't get that..."
      end
      begin
         return puts(input.to_s)
      rescue => error 
         return "Sorry I get an error when parsing that sentence: " + error
      end
   end

   # Get the first NP in questions of the form: is NP1 NP2?
   def getNP1(input = "")
      p "[link.getNP1]: input==" << input << "."
      parse = puts(input) 
      p "[link.getNP1]: parse==" << parse << "."
      #if parse =~ /\[NP (.*) NP\]( not | )\[(NP|ADJP)/
      if parse =~ /\[NP (.*) NP\] \[(NP|ADJP)/
      #if parse =~ /\[NP (.*) NP\] \[(NP|ADJP|VP)?/
         return process($1)
      end
      return "I don't know what the subject in " + input + " is." 
   end

   # Get the subject, which should be the first NP.
   def getSubject(input="")
      p "[link.getSubject]: input==" << input.to_s
      parse = puts(input)
      p "[link.getSubject]: parse==" << parse.to_s
      if parse =~ /\[NP (.*?) NP\] \[(NP|ADJP|VP)?/
         return process($1.to_s)
      end
      return "I'm not sure what the subject in " + input + " is."
   end

   # Get the second NP in questions of the form: is NP1 NP2?.
   def getNP2(input = "")
      parse = puts(input)
      p "[link.getNP2]: parse==" << parse << "."
      #if parse =~ /\[NP .* NP\] (not |)\[(?:NP|ADJP) (.*) (?:NP|ADJP)\]/
      if parse =~ /\[NP .* NP\] \[(?:NP|ADJP) (.*) (?:NP|ADJP)\]/
      #if parse =~ /\[NP .* NP\].*\[VP.*\[(?:NP|ADJP) (.*) (?:NP|ADJP)\].*VP\]/
         if $2 == nil
            g2 = ""
         end
         r = $1.to_s << g2
         r = process(r)
         return r
      end
      return '' 
   end

   # Try to get the object of a declarative sentence.
   def getObject(input = "")
      parse = puts(input)
      if parse =~ /\[NP .* NP\] \[VP.*\[NP (.*) NP\].*VP\]/
         r = $1.to_s
         r = process(r)
         p "[link.getObject] returning: " + r
         return r
      end
      return "I can't find the object in " + input.to_s
   end

   
   # Try to get the main verb of the sentence.
   # TODO: fix problems with conjunctions and adverbial phrases...
   def getV1(input = "")
      parse = puts(input)
      p "[link.getV1]: parse==" << parse << "."
      if parse =~ /\[NP .* NP\] \[VP ([^(?:\[(?:NP|ADJP|PP)?)]) \[(?:(PP|NP|ADJP)?) VP\] \. S\]/
         p "[Link.getV1]: first match."
         r = $1.to_s
         r = process(r)
         p "[link.getV1] returning: " << r.to_s
         return r.to_s
      end
      # Not working, need to get verbs separated by conjunctions and such.
      if parse =~ /\[NP .* NP\](?:[^\[VP])*\[VP (.*) (?:.*\[(?:(PP|NP|ADJP)?))/ 
         p "[Link.getV1]: second match."
         r = $1.to_s
         r = process(r)
         p "[link.getV1] returning: " << r.to_s
         return r.to_s
      end
      if parse =~ /\[NP .* NP\] \[VP (.*) VP\]/
         p '[Link.getV1]: third match.'
         r = $1.to_s
         r = process(r)
         p "[link.getV1] returning: " << r.to_s
         return r.to_s
      end

      return "I don't know what the verb is in " + input.to_s
   end

   def getSVO input
      subj = getSubject(input)
      if subj =~ /I can't/
         subj = ''
      end
      verb = getV1(input) 
      if verb =~ /I can't/
         verb = ''
      end
      obj = getObject(input)
      if obj =~ /I can't/
         obj = ''
      end
      r = subj + "; " + verb + "; " + obj
      p '[Link.getSVO]: returning==' + r
      return r
   end
 
   # Strip r of constituent tags.
   def process(r)
         r.gsub!(/\[WHNP /, "")
         r.gsub!(/\[NP /, "")
         r.gsub!(/\[PP /, "")
         r.gsub!(/\[SBAR /, "")
         r.gsub!(/\[ADVP /, "")
         r.gsub!(/\[VP /, "")
         r.gsub!(/\[S /, "")
         r.gsub!(/\[ADJP( )?/, "")
         r.gsub!(/\[PRT\s?/, "")
         r.gsub!(/WHNP\]( ){0,1}/, "")
         r.gsub!(/NP\]( ){0,1}/, "")
         r.gsub!(/PP\]( ){0,1}/, "")
         r.gsub!(/SBAR\]( ){0,1}/, "")
         r.gsub!(/ADVP\]( ){0,1}/, "")
         r.gsub!(/VP\]( ){0,1}/, "")
         r.gsub!(/S\]( ){0,1}/, "")
         r.gsub!(/ADJP\]( ){0,1}/, "")
         r.gsub!(/PRT\]\s?/, "")
         r.gsub!(/\.\s*$/, "")
         r.strip!
         p "[link.process]: r==" << r << "."
         r
   end

   def gets
      line = @lg.gets
      #print "1. [link.gets]: line==" + line + "\n"
      @fullp = ""
      @fullp << line
      #p line
      if line =~ /^linkparser> \+\+\+\+Time/
         #print "2. [link.gets]: line==" + line + "\n"
         line = 'temp' 
         #print "3. [link.gets]: line==" + line + "\n"
         #line = @lg.gets.to_s
      end
      #while line !~ /^linkparser>/
      while line !~ /^Press RETURN/ or line !~ /^linkparser>/
         #p line
         line = @lg.gets.to_s
         @fullp << line 
         if @fullp =~ /^.*\n(\[S .*\]) \n$/
            print "[link.gets]: RETURNING: " + $1.to_s  + "\n"
            return $1
         end
      end
      return @fullp
   end

   def puts(s="")
      s.strip!
      p "PUTS S==" + s
      if s == "\"\"" or s == "\"" or s == "''" or s == ""
         return "Sorry the input is blank."
      end
      if s =~ /^!/ then return "I can't set link grammar directives." end
      if s =~ /(\w{60,})/ then return "#{1} is too long." end
      if s.size > 300 then return "The input is too long." end
      @lg.puts(s.to_s)
      gets
   end

   # Get a more abstract linguistic representation of the parse
   def get_simple_parse(input)
      parse = puts(input)
      sp = Array.new 
      sp.push("(")
      parse.each(' '){ |token|
         p 'TOKEN==' + token
         case token 
            when /\[S/
               next
            when /S\]/
               next
            when /\[([A-Z]+)/
               sp.push "(#{$1} "
            when /([A-Z]+)\]/
               sp.push ")"
         end
      }
      sp.push ")"
      r = sp.join.to_s
      p 'RETURNING' + r.to_s
      r.strip!
      while r =~ /\(([A-Z]+)(?: )?\)/
         r.sub!(/\(([A-Z]+)(?: )?\)/, "#{$1} ")
      end
      while r =~ /\)([A-Z]+)/ 
         r.sub!(/\)([A-Z]+)/, ") #{$1}")
      end
      while r =~ /([A-Z]+) (\1)/
         r.sub!(/([A-Z]+) (\1)/, "#{$1}")
      end

      r.gsub!(/ \)/, ")")
      p 'RETURNING' + r.to_s
      return r 
   end


end # class Link

if $0 == __FILE__
  link = Link.new
  print ("\n> "); $stdout.flush
  quitwords = [":q", "quit", "exit", "bye"]
  while (line = gets) !~ /^#{quitwords.join('|')}$/i and line !~ /^$/
    print link.getResponse(line) + "\n"
    print ("\n> "); $stdout.flush
  end
  print "Bye!"
end