from django.shortcuts import render
from django.http import HttpResponse
from InvertedIndexSimulator.inverted import main
import pandas as pd 
import xml.etree.ElementTree as et

def home(request):
    return render(request, 'apps/home.html')

def dataframe(request):
    
    parse_data = et.parse("InvertedIndexSimulator/data/dataset_STBI.xml")
    data = parse_data.getroot() 
    
    df_cols = ["DOCNO", "SONG", "ARTIST", "LYRICS"]
    rows = []

    for node in data: 
        s_docno = node.find("DOCNO").text if node is not None else None
        s_song = node.find("SONG").text if node is not None else None
        s_artist = node.find("ARTIST").text if node is not None else None
        s_lyrics = node.find("LYRICS").text if node is not None else None
        
        rows.append({"DOCNO": s_docno, "SONG": s_song, "ARTIST": s_artist, "LYRICS": s_lyrics})
    
    DataFrame = pd.DataFrame(rows, columns = df_cols)
    
    dictionary = DataFrame.set_index('DOCNO').T.to_dict('list')
    
    nilai    = list(dictionary.values())
    nomornya = list(dictionary.keys())
    
    lagunya  = [sublist[0] for sublist in nilai]
    artisnya = [sublist[1] for sublist in nilai]
    liriknya = [sublist[2] for sublist in nilai]
    
    context = {"DOCNO": nomornya, "SONG": lagunya, "ARTIST": artisnya, "LYRICS": liriknya}
    
    return render(request, 'apps/dataframe.html', context)

def preprocessing(request):
    
    from xml.etree.ElementTree import ElementTree
    tree = ElementTree()
    tree.parse("InvertedIndexSimulator/data/dataset_STBI.xml")

    all_doc_no = []
    all_song = []
    all_text = []

    for node in tree.iter("DOCNO"):
        all_doc_no.append(node.text)
        
    for node in tree.iter("SONG"):
        all_song.append(node.text)
        
    for node in tree.iter("LYRICS"):
        all_text.append(node.text)

    N_DOC = len(all_text)
    
    all_sentence_doc = []
    for i in range(N_DOC):
        all_sentence_doc.append(all_song[i] + all_text[i])
    tokens_doc = []
    for i in range(N_DOC):
        tokens_doc.append(main.remove_punc_tokenize(all_sentence_doc[i]))
    
    context = {"tokens_doc": tokens_doc}
    
    return render(request, 'apps/preprocessing.html', context)


def preprocessing2(request):
    
    from xml.etree.ElementTree import ElementTree
    tree = ElementTree()
    tree.parse("InvertedIndexSimulator/data/dataset_STBI.xml")

    all_doc_no = []
    all_song = []
    all_text = []

    for node in tree.iter("DOCNO"):
        all_doc_no.append(node.text)
        
    for node in tree.iter("SONG"):
        all_song.append(node.text)
        
    for node in tree.iter("LYRICS"):
        all_text.append(node.text)

    N_DOC = len(all_text)
    
    all_sentence_doc = []
    for i in range(N_DOC):
        all_sentence_doc.append(all_song[i] + all_text[i])
        
    tokens_doc = []
    
    for i in range(N_DOC):
        tokens_doc.append(main.remove_punc_tokenize(all_sentence_doc[i]))
    
    for i in range(N_DOC):
        tokens_doc[i] = main.to_lower(tokens_doc[i])
    
    context = {"tokens_doc": tokens_doc}
    
    return render(request, 'apps/preprocessing2.html', context)
    

def preprocessing3(request):
    
    from xml.etree.ElementTree import ElementTree
    tree = ElementTree()
    tree.parse("InvertedIndexSimulator/data/dataset_STBI.xml")

    all_doc_no = []
    all_song = []
    all_text = []

    for node in tree.iter("DOCNO"):
        all_doc_no.append(node.text)
        
    for node in tree.iter("SONG"):
        all_song.append(node.text)
        
    for node in tree.iter("LYRICS"):
        all_text.append(node.text)

    N_DOC = len(all_text)
    
    all_sentence_doc = []
    for i in range(N_DOC):
        all_sentence_doc.append(all_song[i] + all_text[i])
        
    tokens_doc = []
    
    for i in range(N_DOC):
        tokens_doc.append(main.remove_punc_tokenize(all_sentence_doc[i]))
    
    for i in range(N_DOC):
        tokens_doc[i] = main.to_lower(tokens_doc[i])
    
    for i in range(N_DOC):
        tokens_doc[i] = main.stop_word_token(tokens_doc[i])
        
    for i in range(N_DOC):
        tokens_doc[i] = ([w for w in tokens_doc[i] if not any(j.isdigit() for j in w)])
    
    context = {"tokens_doc": tokens_doc}
    
    return render(request, 'apps/preprocessing3.html', context)

def preprocessing4(request):
    
    from xml.etree.ElementTree import ElementTree
    tree = ElementTree()
    tree.parse("InvertedIndexSimulator/data/dataset_STBI.xml")

    all_doc_no = []
    all_song = []
    all_text = []

    for node in tree.iter("DOCNO"):
        all_doc_no.append(node.text)
        
    for node in tree.iter("SONG"):
        all_song.append(node.text)
        
    for node in tree.iter("LYRICS"):
        all_text.append(node.text)

    N_DOC = len(all_text)
    
    all_sentence_doc = []
    for i in range(N_DOC):
        all_sentence_doc.append(all_song[i] + all_text[i])
        
    tokens_doc = []
    
    for i in range(N_DOC):
        tokens_doc.append(main.remove_punc_tokenize(all_sentence_doc[i]))
    
    for i in range(N_DOC):
        tokens_doc[i] = main.to_lower(tokens_doc[i])
    
    for i in range(N_DOC):
        tokens_doc[i] = main.stop_word_token(tokens_doc[i])
        
    for i in range(N_DOC):
        tokens_doc[i] = ([w for w in tokens_doc[i] if not any(j.isdigit() for j in w)])
    
    for i in range(N_DOC):
        tokens_doc[i] = main.stemming(tokens_doc[i])
    
    context = {"tokens_doc": tokens_doc}
    
    return render(request, 'apps/preprocessing4.html', context)



def indexing(request):
    from sklearn.feature_extraction.text import CountVectorizer
    from xml.etree.ElementTree import ElementTree
    tree = ElementTree()
    tree.parse("InvertedIndexSimulator/data/dataset_STBI.xml")

    all_doc_no = []
    all_song = []
    all_text = []

    for node in tree.iter("DOCNO"):
        all_doc_no.append(node.text)
        
    for node in tree.iter("SONG"):
        all_song.append(node.text)
        
    for node in tree.iter("LYRICS"):
        all_text.append(node.text)

    N_DOC = len(all_text)
    
    all_sentence_doc = []
    for i in range(N_DOC):
        all_sentence_doc.append(all_song[i] + all_text[i])
        
    tokens_doc = []
    
    for i in range(N_DOC):
        tokens_doc.append(main.remove_punc_tokenize(all_sentence_doc[i]))
    
    for i in range(N_DOC):
        tokens_doc[i] = main.to_lower(tokens_doc[i])
    
    for i in range(N_DOC):
        tokens_doc[i] = main.stop_word_token(tokens_doc[i])
        
    for i in range(N_DOC):
        tokens_doc[i] = ([w for w in tokens_doc[i] if not any(j.isdigit() for j in w)])
    
    for i in range(N_DOC):
        tokens_doc[i] = main.stemming(tokens_doc[i])
        
    all_tokens =[]
    for i in range(N_DOC):
        for j in tokens_doc[i]:
            all_tokens.append(j)
        
    new_sentences = ' '.join([w for w in all_tokens])

    for j in CountVectorizer().build_tokenizer()(new_sentences):
        all_tokens.append(j)
    
    all_tokens = set(all_tokens)
    
    from itertools import count
    try:
        from future_builtins import zip
    except ImportError: # not 2.6+ or is 3.x
        try:
            from itertools import izip as zip # < 2.5 or 3.x
        except ImportError:
            pass
        
    proximity_index = {}
    for token in all_tokens:
        dict_doc_position = {}
        for n in range(N_DOC):
            if(token in tokens_doc[n]):
                dict_doc_position[all_doc_no[n].firstChild.data] = [i+1 for i, j in zip(count(), tokens_doc[n]) if j == token]
        proximity_index[token] = dict_doc_position
    
    import collections
    proximity_index = collections.OrderedDict(sorted(proximity_index.items()))
    for key, value in proximity_index.items():
        indexnya = (key, value)
    
    context = {"indexnya": indexnya}
    
    return render(request, 'apps/indexing.html', context)
    
def index(request):
    return render(request, 'apps/index.html')


def lyric(request,id):
    
    text, judul = main.detail(id)
    content={
        'no': id,
        'judul':judul,
        'text':text
    }
    return render(request, 'apps/lyric.html', content)

def result(request):
    #%%
    # proximity_index = collections.OrderedDict(sorted(proximity_index.items()))
    # for key, value in proximity_index.items():
    #     # print (key, value)
    
    if request.method == 'POST':
        query = request.POST['querysearch']
        hasil= main.main(query)

        content={
            'hasil':hasil,
            'query':query
        }
        return render(request, 'apps/result.html', content)
