package market.thread;

import java.io.*;
import market.util.*;
import java.net.*;
import market.*;
import market.util.url.*;
import market.util.text.*;

/**
 * Parse HTML or TEXT/LINKS 
 * Creation date: (1/2/00 9:21:52 PM)
 * @author: tin
 */
public class Parser extends Thread {

	public Queue htmlInStream;
	public Queue linkOutStream;	
	public Queue textOutStream;	
	
	public market.util.HtmlElement inHtml;
	public SrcElement baseSrcElem;
	
	public boolean working;
	public boolean readingHtmlStream;

/**
 * Parser constructor comment.
 */
public Parser() {
	super();
}
/**
 * This constructor get input pipe. By this pipe Parser recive HTML or TEXT/LINKS. 
 * Creation date: (12/27/99 6:42:25 PM)
 * @param num int ( Netrunner number )
 * @param threadGroup ThreadGroup 
 * @param htmlInPipe market.util.Queue
 * @param srcElem market.util.SrcElement
 * @exception java.io.IOException The exception description.
 * @exception java.io.StreamCorruptedException The exception description.
 */
public Parser( int num, ThreadGroup threadGroup, Queue htmlInPipe, SrcElement srcElem ) throws IOException, StreamCorruptedException {
	super( threadGroup, "Parser"+num);
	this.htmlInStream= htmlInPipe;
	this.baseSrcElem= srcElem;
	readingHtmlStream = false;

// Link chenal set 	
	linkOutStream = new Queue( ConfigMarket.PARSER_LINK_OUT );
// end Link chenal set 	

// Text chenal set 	

	textOutStream = new Queue( ConfigMarket.PARSER_TEXT_OUT );
// end Text chenal set 	

}
/**
 * Count source on SourceServer
 * Creation date: (12/25/99 7:23:37 PM)
 * @return boolean (true for success )
 * @param srcKey long
 * @param baseSrcKey int
 * @param market_key int
 * @param title String 
 */
public boolean countSource( long srcKey, long baseSrcKey, int market_key, String title, long parSrcKey  ) {

	StringBuffer ret_srcValue = new StringBuffer( Const.MAX_LEN_KEY );
	
	String encoded_title = URLEncoder.encode( title );

	try{
		
	  URL ServletURL = new URL( ConfigMarket.hostSourceServer +"?"+ Const.REQ_TYPE +"="+ Const.COUNT_SRC+ 
		  "&" + Const.SRC_KEY +"="+ srcKey + 
		  "&" + Const.BASE_SRC_KEY +"="+ baseSrcKey +
		  "&" + Const.PAR_SRC_KEY +"="+ parSrcKey +
		  "&" + Const.MARKET_KEY +"="+ market_key +
		  "&" + Const.SRC_TITLE +"="+ encoded_title ); 

	  InputStream in = ServletURL.openStream();
	  
	  int b; 
	
 	  while ((b =in.read())!= -1){ 
	 	  ret_srcValue.append((char)b);  
	  } 

	  in.close();
	  ConfigMarket.logWriter.log( getName()+": Count result: " + ret_srcValue.toString() , LogPrinter.DEBUG_2);
	  return Boolean.valueOf(ret_srcValue.toString()).booleanValue();

	}
	catch(IOException e){
		ConfigMarket.logWriter.log( getName()+": Can't connect SrcServer!", LogPrinter.ERROR);
		return false;
	}
	

}
/**
 * Return stream of links.
 * Creation date: (1/2/00 9:46:58 PM)
 * @return market.util.Queue
 */
public Queue getLinkInPipe() {
	return linkOutStream;
}
/**
 * Return stream of text docoments.
 * Creation date: (1/2/00 10:13:58 PM)
 * @return market.util.Queue
 */
public Queue getTextInPipe() {
	return textOutStream;
}
/**
 * Insert the method's description here.
 * Creation date: (13.1.00 20:32:07)
 * @return boolean
 */
public boolean isWait() {
	return readingHtmlStream;
}
/**
 * Kill Parser and all our slave.
 * Creation date: (03.3.2000 “. 04:34:59)
 */
public void kill() {	
	
	stop();
}
/**
 * Insert the method's description here.
 * Creation date: (11.2.2000 “. 21:42:06)
 * @param args java.lang.String[]
 */
public static void main(String[] args) {
	

}
/**
 * Send links to Netrunner
 * Creation date: (1/5/00 9:09:17 PM)
 * @param srcKey long
 * @param grabLevel int
 * @param langKey int
 * @param links java.lang.String
 */
public void parSendLinks( long srcKey, int grabLevel, int langKey, String links) {

	String linkValue, absLinkValue, baseUrl;
	int addGrabLevel = 0; 
//	System.out.println( " Test:" + srcKey );
//	System.out.println( " Test2:" + links );
	try{
		StringReader reader= new StringReader( links );
		LineNumberReader linksIn = new LineNumberReader( reader );
		
		linkValue= linksIn.readLine();
		if ( linkValue == null )
				return;	// exit links string is empty	
		if ( linkValue.endsWith( Const.BASE_URL ) ){
			baseUrl = linkValue.substring( 0, linkValue.length() - Const.BASE_URL.length() );
			linkValue= linksIn.readLine();
		}
		else{
			baseUrl = inHtml.srcString;
		}	
			
		while ( linkValue != null){
			
			if ( linkValue.endsWith( Const.FORM_URL ) ){
				linkValue = linkValue.substring( 0, linkValue.length() -  Const.FORM_URL.length() );
				addGrabLevel = 1;
			}
			else
				addGrabLevel = 0;
			  
			absLinkValue = Links.AbsoluteUrl( baseUrl, linkValue );
									
			linkValue= linksIn.readLine();
			
			if ( absLinkValue== null ) continue;

			if ( baseSrcElem.keepInDomain && 
					!Links.CheckKeepInDomain( baseSrcElem.srcString, absLinkValue) ){
						
				ConfigMarket.logWriter.log( getName()+": Src: " + absLinkValue
											+" is out of Domain. ", LogPrinter.DEBUG_1);
				continue;		
			}	

			if ( baseSrcElem.keepInPath && 
					!Links.CheckKeepInPath( baseSrcElem.srcString, absLinkValue) ){

				ConfigMarket.logWriter.log( getName()+": Src: " + absLinkValue
											+" is out of Path. ", LogPrinter.DEBUG_1);
				continue;		
			}	
			
			LinkElement curLink = new LinkElement( srcKey, langKey, absLinkValue, 
													grabLevel + addGrabLevel, srcKey );
		  	
			try{
				linkOutStream.writeObject( curLink ); // Set For Netrunner
				yield();
			}
			catch( Exception e ){
				ConfigMarket.logWriter.log( getName()+": Can't write in link Stream!", LogPrinter.ERROR);
			}
		}
	}	
	catch( IOException e ){
		ConfigMarket.logWriter.log( getName()+": Can't read from links!", LogPrinter.ERROR);
	}		

}
/**
 * Main function of Parser contein:  <br>
 * 1. Get dockoment from Downloader <br> 
 * 2. Check dockoment from DB or Internet is comming <br>
 * 3. Parse Html ( Internet doc ) or Split text and link ( DB doc ) <br>
 * 4. Count src on SourceServer <br>
 * 5. Send link to Netrunner <br>
 * 6. Send text to TextSplitter <br>
 * Creation date: (1/5/00 6:09:08 PM)
 */
public void run() {
	Runtime myRuntime = Runtime.getRuntime();
	HtmlParser htmlParser = new HtmlParser();
	boolean res;
	DataClass doc;
	int numEmptyLinkStream = 0;
	working = true;
	while (working) {
		if ((float) myRuntime.freeMemory() / myRuntime.totalMemory() < Const.MinFreeMemory) {
			ConfigMarket.logWriter.log(getName() + ": Total Memory: " + myRuntime.totalMemory() + " Free Memory: " + myRuntime.freeMemory(), LogPrinter.DEBUG);
			System.runFinalization();
		}
		try {
			// 1. Get dockoment from Downloader 
			try {
				inHtml = null;
				ConfigMarket.logWriter.log(getName() + ": Geting Next Document...", LogPrinter.DEBUG);
				readingHtmlStream = true;
				inHtml = (HtmlElement) htmlInStream.readObject();
				readingHtmlStream = false;
				if (inHtml == null) {
					working = false;
					continue;
				}
			} catch (Exception e) {
				ConfigMarket.logWriter.log(getName() + ": Empty link Stream ", LogPrinter.ERROR);
				if (++numEmptyLinkStream > Const.MAX_EMPTY)
					working = false;
				continue;
			}
			ConfigMarket.logWriter.log(getName() + ": Geted fromDB= " + inHtml.fromDB, LogPrinter.DEBUG);

			// 2. Check dockoment from DB or Internet is comming

			if (inHtml.fromDB) {
				// 3.Split text and link ( DB doc )
				// Split Text and link
				doc = TextLinks.Separate(inHtml.html);
				ConfigMarket.logWriter.log(getName() + ": Separateed document. ", LogPrinter.DEBUG);
			} else {
				// 3. Parse Html ( Internet doc )

				doc = htmlParser.splitHtml(inHtml.html); //   IMETO NA HTML FILA
				yield();
				ConfigMarket.logWriter.log(getName() + ": Split HTML document", LogPrinter.DEBUG);
				
				res = ConfigMarket.mDocsCacher.saveDocument(inHtml.srcKey, TextLinks.Collect(doc));
				ConfigMarket.logWriter.log(getName() + ": Save document in m_docs res : " + res, LogPrinter.DEBUG);
			}
			// * 4. Count source on SourceServer
			res = countSource(inHtml.srcKey, baseSrcElem.srcKey, ConfigMarket.marketKey, doc.titleString, inHtml.parSrcKey);
			ConfigMarket.logWriter.log( getName()+": Count result: " + res , LogPrinter.DEBUG);
			if (res) { // update data if count is Ok
				if (inHtml.srcKey == baseSrcElem.srcKey) {
					baseSrcElem.lastChecked = new java.sql.Date(System.currentTimeMillis());
				}
			}
			// * 5. Send link to Netrunner 

			parSendLinks(inHtml.srcKey, inHtml.grabLevel, inHtml.langKey, doc.linkString);
			ConfigMarket.logWriter.log(getName() + ": Links are send to Netrunnernum Of Text Out Element:" + linkOutStream.getNumOfElement(), LogPrinter.DEBUG);
			// * 6. Send text to TextSplitter 

			TextElement textDoc = new TextElement(inHtml.srcKey, inHtml.langKey, doc.textString);
			textOutStream.writeObject((Object) textDoc);
			yield();
			ConfigMarket.logWriter.log(getName() + ": Text are send to TextSlitter num Of Text Out Element:" + textOutStream.getNumOfElement(), LogPrinter.DEBUG);
		} catch ( ThreadDeath t) {
			ConfigMarket.logWriter.log(getName() + ": Force stop (kill):", LogPrinter.ERROR);
			t.printStackTrace( ConfigMarket.logWriter.getPrintWriter() );
			ConfigMarket.logWriter.log(getName() + ": Finish. ", LogPrinter.INFO);	
			return;	
		} catch ( Throwable t) {
			ConfigMarket.logWriter.log(getName() + ": Somting wrong after read from htmlInStream: "+t, LogPrinter.ERROR);
			t.printStackTrace( ConfigMarket.logWriter.getPrintWriter() );
			( (Graber)getThreadGroup() ).sendError();
		}
	}
	linkOutStream.writeObject((Object) null);
	textOutStream.writeObject((Object) null);
	ConfigMarket.logWriter.log(getName() + ": Finish. ", LogPrinter.INFO);
}
}

