import java.util.*;

/*
 * SlideExtractor.java
 *
 */


/**
 *
 * While the TalkLoader does the prearanging for loading/scanning a talk,
 * like asking for slide delimiting commands etc, the SlideExtractor does
 * the sophisticated work like parsing and indexing of the talks slides.
 *
 * @author  Olle Nebendahl
 */

class SlideExtractor
{   
    ILL foundSlides;

    SlideExtractor(TagRepresentation tr, String s, String subject,ProgressGUI progressGUI)
    {
        s=FOS.bracketsSave(s);
        String[] bodies = s.split(TagRepresentation.string2regex(tr.getStartTag()));
        foundSlides=new ILL();
        int l = bodies.length;
        int i = 1;
        while(i<l)
        {
            progressGUI.Label2.setText(i+" of "+l+" slides added to data base");
            progressGUI.pack();
            progressGUI.setVisible(true);
            progressGUI.update(progressGUI.getGraphics());
            bodies[i]=FOS.bracketsUnSave(bodies[i].split(TagRepresentation.string2regex(tr.getStopTag()),2)[0]);
            foundSlides.append(SlideWrapper.createAndFlush("slide #"+i,subject,bodies[i]));
           
            i++;
        }        

        progressGUI.Label2.setText(i+" slides found and added to data base.");
        progressGUI.pack();
        progressGUI.goodbye();
        
        SlideConnection.slideOrderUpdate(foundSlides);
        foundSlides.reset();
        if(foundSlides.hasNext())
        {
            String fileName = (new java.io.File(subject)).getName();
            TalkWrapper tw = new TalkWrapper("no title","no subject",fileName,fileName,foundSlides.getNext());
        }
    }
    

    SlideExtractor(CommandRepresentation cr, String s, String subject, ProgressGUI progressGUI)
    {
        s=FOS.bracketsSave(s);
        //System.out.println("Targets");
        //System.out.println(cr.bodyTarget);
        //System.out.println(cr.titleTarget);
        //System.out.println(cr.i);
        
        cr=PrefSlideCommands.getWithTargets(cr);

        //System.out.print(cr.bodyTarget+", ");
        //System.out.print(cr.titleTarget+", ");
        //System.out.println(cr.i);
        
        s=FOS.trimSlideCommand(cr.s,s);
        
        String[] splitted = s.split("\\\\"+cr.s.substring(1)+"\\{");
        int l = splitted.length;
        String[] bodies = new String[l];
        String[] titles = new String[l];
        
        foundSlides=new ILL();
                
        String token;
        
        OLL tokens;

        int i = 1;
        int j; int k;
        
        while(i<l)
        {
            //System.out.println("*************framestart");
            //System.out.println(new BracketsTokenizer("{","}",splitted[i]).getFirstToken());
            //System.out.println("#############framestopp\n\n\n");       
            
            tokens = BracketsTokenizer.getTokens("{"+splitted[i],"{","}");
            tokens.reset();
            j=cr.i;
            k=0;
            while(k<j)
            {
                token=FOS.bracketsUnSave((String)tokens.getNext());
                if(k+1==cr.bodyTarget)bodies[i]=token;
                if(k+1==cr.titleTarget)titles[i]=token;
                
                
            //System.out.println("*"+token+"*");
                
                
                k++;
            }
            if(titles[i]==null)titles[i]="slide #"+i;
            foundSlides.append(SlideWrapper.createAndFlush(titles[i],subject,bodies[i]));
            progressGUI.Label2.setText(i+" of "+l+" slides added to data base");
            progressGUI.pack();
            progressGUI.setVisible(true);
            progressGUI.update(progressGUI.getGraphics());
            i++;
        }
        progressGUI.Label2.setText(i+" slides found and added to data base.");
        progressGUI.pack();
        progressGUI.goodbye();

        SlideConnection.slideOrderUpdate(foundSlides);
        foundSlides.reset();
        if(foundSlides.hasNext())
        {
            String fileName = (new java.io.File(subject)).getName();
            TalkWrapper tw = new TalkWrapper("no title","no subject",fileName,fileName,foundSlides.getNext());
        }
    }
        
    static void testOut(TreeSet ts)
    {
        Object[] st = ts.toArray();
        int l = st.length;
        int i =0;
        while(i<l)System.out.println((String)st[i++]);
    }
    
    static TreeSet indexWords(String s)
    {
        //System.out.println(s);
        if(s==null)return new TreeSet();
        String[] words = s.split("[\\s.!,?:;'\"()+`\\[\\]]");
        int l = words.length;
        int i = 0;
        TreeSet ts=new TreeSet();
        while(i<l)
            indexTokenize(words[i++],ts);
        return ts;
    }
    
    static void testIndexWords(String s)
    {
        TreeSet t = indexWords(s);
        Iterator it = t.iterator();
        while(it.hasNext())
        {
            System.out.print(it.next()+" , ");
            it.remove();
        }
    }
    
    static void indexTokenize(String s, TreeSet tokens)
    {
        String[] splitted = s.split("\\\\\\\\");
        int l = splitted.length;
        int i=0;
        while(i<l)
        {
            indexTokenize2(splitted[i],tokens);
            //System.out.println(splitted[i]);
            i++;
        }
    }
    
    static void indexTokenize2(String s, TreeSet tokens)
    {
        //System.out.println(":"+s);
        if(null==s)return;
        if(s.length()<4)return;
        String[] splitted = s.split("\\\\",2);
        String token;
        int l = splitted.length;
        if(l>0)
            //tokens.append(splitted[0]);
            indexTokenize3(splitted[0],tokens);
        if(l>1)
        {
            if(splitted[1].length()>3)
            {
                //BracketsTokenizer bt = new BracketsTokenizer("{","}",splitted[1]);
                String[] temp = BracketsTokenizer.split(splitted[1],"{","}");
                indexTokenize2(temp[1],tokens);
                indexTokenize2(temp[0],tokens);
            }
        }
    }
    
    static void indexTokenize3(String s, TreeSet tokens)
    {
        String result=s.replaceAll("[\\[\\](){}]",".");
        String splitted[] =result.split("\\.");
        int l = splitted.length; 
        int i =0;
        while(i<l)
        {   
            if(splitted[i].length()>3)tokens.add(splitted[i]);
            i++;
        }
    }
    
    public static void main(String[] a)
    {
        /*
        String s = FOS.readFromFileWithoutTexComments(a[1]);
        String[] splitted=s.split(TagRepresentation.string2regex(a[0]));
        System.out.println(splitted[0]);*/
        testIndexWords(FOS.readFromFileWithoutTexComments(a[0]));
    }
}
