Commit 8f565798 by Yolanda Nainggolan

added searching, fixed result and lyrics

parent 2d25e3c9
......@@ -55,15 +55,13 @@ footer {
border-radius: 15px;
padding: 20px;
margin-top: 10px;
width: auto;
width: 100%;
}
.carda {
box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2);
border-radius: 15px;
padding: 20px;
margin-top: 10px;
width: max-content;
table{
table-layout: fixed;
border: 1px solid black;
width: 100px;
}
.jumbotron {
......@@ -155,11 +153,6 @@ button:hover span:after {
right: 0;
}
table, th, td {
border: 1px solid black;
border-collapse: collapse;
}
form button {
display: inline-block;
......
@import url('https://fonts.googleapis.com/css?family=Quicksand:400,700&display=swap');
body {
font-family: sans-serif;
}
h2, h3 {
color: #00a2c6
}
footer {
color: white;
background-color: #591a75
}
nav a {
font-size: 18px;
font-weight: 400;
text-decoration: none;
}
nav a:hover {
font-weight: bold;
}
.profile header {
text-align: center;
}
footer {
position: fixed;
left: 0;
bottom: 0;
width: 100%;
padding: 5px;
color: white;
background-color: #440f5c;
text-align: center;
font-weight: bold;
}
.featured-image {
width: 100%;
max-height: 300px;
object-fit: cover;
object-position: center;
}
.card {
box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2);
border-radius: 15px;
padding: 20px;
margin-top: 10px;
}
.jumbotron {
font-size: 20px;
padding: 60px;
text-align: center;
color: white;
background-image: url(https://ak.picdn.net/assets/cms/music_subscription_homepage_banner.jpg);
background-size: cover;
background-repeat: no-repeat;
text-shadow: black 0.3em 0.3em 0.3em;
}
nav {
background-color: #091729;
padding: 5px;
position: sticky;
top: 0;
}
nav a {
font-size: 18px;
font-weight: 400;
text-decoration: none;
color: white;
}
body {
font-family: 'Quicksand', sans-serif;
margin: 0;
padding: 0;
}
main {
padding: 15px;
overflow: auto;
}
#content {
width: 100%;
}
* {
box-sizing: border-box;
}
.button {
display: inline-block;
border-radius: 4px;
background-color: #7c1ca6;
border: none;
color: #FFFFFF;
text-align: center;
font-size: 15px;
padding: 20px;
transition: all 0.5s;
cursor: pointer;
margin: 5px;
}
button span {
cursor: pointer;
display: inline-block;
position: relative;
transition: 0.5s;
}
button span:after {
content: '\00bb';
position: absolute;
opacity: 0;
top: 0;
right: -20px;
transition: 0.5s;
}
button:hover span {
padding-right: 25px;
}
button:hover span:after {
opacity: 1;
right: 0;
}
form button {
display: inline-block;
border-radius: 4px;
background-color: #7c1ca6;
border: none;
color: #FFFFFF;
text-align: center;
font-size: 15px;
padding: 10px;
transition: all 0.5s;
cursor: pointer;
margin: 5px;
width: 80px;
}
\ No newline at end of file
......@@ -5,23 +5,6 @@
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Song Lyric Search Engine</title>
<link href="../../static/assets/css/dataframe.min.css" rel="stylesheet">
<style>
#leftbox {
text-align: center;
float:left;
white-space: nowrap;
}
#middlebox{
float:left;
text-align: center;
white-space: nowrap;
}
#middleboxb{
float:left;
text-align: left;
white-space: nowrap;
}
</style>
</head>
<body>
......@@ -38,69 +21,23 @@
</div>
<center><h1>Dataset</h1><br></center>
<article class="carda" style="overflow-x:scroll; overflow-y:scroll;">
<div id = "leftbox">
<table>
<tr>
<th>DOCNO</th>
</tr>
{% for i in DOCNO %}
<tr>
<td>{{ i }}</td>
</tr>
{% endfor %}
</table>
</div>
<div id = "middlebox">
<table align="left">
<tr>
<th>SONG</th>
</tr>
{% for i in SONG %}
<tr>
<td>{{ i }}</td>
</tr>
{% endfor %}
</table>
</div>
<div id = "middlebox">
<table>
<tr>
<th>ARTIST</th>
</tr>
{% for i in ARTIST %}
<tr>
<td>{{ i }}</td>
</tr>
{% endfor %}
</table>
</div>
<div id = "middleboxb">
<table>
<tr>
<th>SONG</th>
<th>LYRICS</th>
</tr>
{% for i in LYRICS %}
{% for i in DOCNO %}
<tr>
<td>{{ i }}</td>
<td>{{ j }}</td>
<td>{{ k }}</td>
<td>{{ l }}</td>
</tr>
{% endfor %}
</table>
</div>
</article>
</article>
</div>
......
......@@ -4,24 +4,7 @@
<head>
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Song Lyric Search Engine</title>
<link href="../../static/assets/css/dataframe.min.css" rel="stylesheet">
<style>
#leftbox {
text-align: center;
float:left;
white-space: nowrap;
}
#middlebox{
float:left;
text-align: center;
white-space: nowrap;
}
#middleboxb{
float:left;
text-align: left;
white-space: nowrap;
}
</style>
<link href="../../static/assets/css/trying.min.css" rel="stylesheet">
</head>
<body>
......@@ -37,41 +20,20 @@
</div>
</div>
<center><h1>Proximity Index</h1><br></center>
<article class="carda" style="overflow-x:scroll; overflow-y:scroll;">
<div id = "leftbox">
<table>
<center><p style="font-size:40px;"><strong>Indexing</strong></p>
<table width="100%"; border="1px solid black">
<tr>
<th>Token</th>
</tr>
{% for i in words %}
<tr>
<td>{{ i }}</td>
</tr>
{% endfor %}
</table>
</div>
<div id = "middleboxb">
<table align="left">
<tr>
<th>Index</th>
</tr>
{% for i in freq %}
{% for key, values in res.items %}
<tr>
<td>{{ i }}</td>
<td>{{ key }}</td>
<td>{{ values }}</td>
</tr>
{% endfor %}
</table>
</div>
</article>
</article>
</div>
......
<!DOCTYPE html>
<html lang="en">
<head>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
......@@ -21,61 +21,27 @@
<!-- Custom styles for this template -->
<link href="../../static/assets/css/landing-page.min.css" rel="stylesheet">
</head>
</head>
<body>
<body>
<!-- Navigation -->
<nav class="navbar navbar-light bg-light static-top">
<div class="container">
<a class="navbar-brand" href="/">Cari Lagu</a>
<!-- <a class="btn btn-primary" href="#">Pilih Buku</a>
-->
<a class="navbar-brand" href="/">Search Simulator</a>
</div>
</nav>
<!-- Masthead -->
<!-- <header class="masthead text-white text-center">
<div class="overlay"></div>
<div class="container">
<div class="row">
<div class="col-xl-9 mx-auto">
<h1 class="mb-5">Silahkan masukkan lirik dari lagu yang ingin Anda temukan</h1>
</div>
<div class="col-md-10 col-lg-8 col-xl-7 mx-auto">
<form method="POST" action="/search">
<div class="form-row">
<div class="col-12 col-md-9 mb-2 mb-md-0">
<input type="text" class="form-control form-control-lg" name="querysearch" placeholder="Masukkan Query Anda...">
</div>
<div class="col-12 col-md-3">
<button type="submit" class="btn btn-block btn-lg btn-primary">Cari!</button>
</div>
</div>
</form>
</div>
</div>
</div>
</header> -->
<!-- Testimonials -->
<section class="testimonials text-center bg-light">
<div class="container">
<h2 class="mb-3">Lirik Lagu</h2>
<h4 class="mb-3">No.{{no}} - {{judul}} </h4>
<p>{{text}}</p>
<h4 class="mb-3">No. {{ no }} - {{ judul }} </h4>
<p>{{ lyrics }}</p>
</div>
</section>
<!-- Bootstrap core JavaScript -->
<script src="../../static/assets/vendor/jquery/jquery.min.js"></script>
<script src="../../static/assets/vendor/bootstrap/js/bootstrap.bundle.min.js"></script>
</body>
</body>
</html>
......@@ -24,41 +24,26 @@
</head>
<body>
<!-- Navigation -->
<nav class="navbar navbar-light bg-light static-top">
<div class="container">
<a class="navbar-brand" href="/">CariLagu</a>
<!-- <a class="btn btn-primary" href="#">Pilih Buku</a>
-->
<a class="navbar-brand" href="/">Search Simulator</a>
</div>
</nav>
<!-- Testimonials -->
<section class="testimonials text-center bg-light">
<div class="container">
<h2 class="mb-5">Lagu yang sesuai dengan "{{ query }}"</h2>
{% if hasil %}
<h2 class="mb-5">Lagu yang sesuai dengan query "{{ query }}"</h2>
<div class="row">
{% for i in hasil %}
{% for j in i %}
{% for key, values in res.items %}
<div class="col-lg-4">
<div class="testimonial-item mx-auto mb-5 mb-lg-0">
<img class="img-fluid rounded-circle mb-3" src="../../static/img/hkbp.jpg" alt="">
<h5><a href="/lyric">Lagu No:{{ j.docno }}</a></h5>
<h5>"{{ j.judul }}"</h5>
<p class="font-weight-light mb-0">score :{{ j.score }}</p>
<h5><a href="/lyric/{{ key }}">Lagu No: {{ key }}</a></h5>
<h5>"{{ values }}"</h5>
</div>
</div>
{% endfor %}
{% endfor %}
</div>
{% else %}
<h2 class="mb-5">Lagu dengan lirik: "{{ query }}" tidak ditemukan</h2>
{% endif %}
</div>
</section>
......
from django.shortcuts import render
from django.http import HttpResponse
from InvertedIndexSimulator.inverted import main
from xml.etree.ElementTree import ElementTree
from sklearn.feature_extraction.text import CountVectorizer
from itertools import count
import pandas as pd
import xml.etree.ElementTree as et
import string
import re
from sklearn.feature_extraction.text import CountVectorizer
import json
import xml.dom.minidom as minidom
import collections
from itertools import count
try:
from future_builtins import zip
except ImportError: # not 2.6+ or is 3.x
......@@ -17,138 +19,56 @@ except ImportError: # not 2.6+ or is 3.x
except ImportError:
pass
def home(request):
return render(request, 'apps/home.html')
def dataframe(request):
parse_data = et.parse("InvertedIndexSimulator/data/dataset_STBI.xml")
data = parse_data.getroot()
df_cols = ["DOCNO", "SONG", "ARTIST", "LYRICS"]
rows = []
for node in data:
s_docno = node.find("DOCNO").text if node is not None else None
s_song = node.find("SONG").text if node is not None else None
s_artist = node.find("ARTIST").text if node is not None else None
s_lyrics = node.find("LYRICS").text if node is not None else None
rows.append({"DOCNO": s_docno, "SONG": s_song, "ARTIST": s_artist, "LYRICS": s_lyrics})
DataFrame = pd.DataFrame(rows, columns = df_cols)
dictionary = DataFrame.set_index('DOCNO').T.to_dict('list')
nilai = list(dictionary.values())
nomornya = list(dictionary.keys())
lagunya = [sublist[0] for sublist in nilai]
artisnya = [sublist[1] for sublist in nilai]
liriknya = [sublist[2] for sublist in nilai]
context = {"DOCNO": nomornya, "SONG": lagunya, "ARTIST": artisnya, "LYRICS": liriknya}
context = main.show_dataframe(parse_data)
return render(request, 'apps/dataframe.html', context)
def preprocessing(request):
from xml.etree.ElementTree import ElementTree
tree = ElementTree()
tree.parse("InvertedIndexSimulator/data/dataset_STBI.xml")
all_doc_no = []
all_song = []
all_text = []
all_doc_no, all_song, all_lyrics, N_DOC, all_sentence_doc = main.data_var(tree)
for node in tree.iter("DOCNO"):
all_doc_no.append(node.text)
for node in tree.iter("SONG"):
all_song.append(node.text)
for node in tree.iter("LYRICS"):
all_text.append(node.text)
N_DOC = len(all_text)
all_sentence_doc = []
for i in range(N_DOC):
all_sentence_doc.append(all_song[i] + all_text[i])
tokens_doc = []
for i in range(N_DOC):
tokens_doc.append(main.remove_punc_tokenize(all_sentence_doc[i]))
context = {"tokens_doc": tokens_doc}
context = {
"tokens_doc": tokens_doc
}
return render(request, 'apps/preprocessing.html', context)
def preprocessing2(request):
from xml.etree.ElementTree import ElementTree
tree = ElementTree()
tree.parse("InvertedIndexSimulator/data/dataset_STBI.xml")
all_doc_no = []
all_song = []
all_text = []
for node in tree.iter("DOCNO"):
all_doc_no.append(node.text)
for node in tree.iter("SONG"):
all_song.append(node.text)
for node in tree.iter("LYRICS"):
all_text.append(node.text)
N_DOC = len(all_text)
all_sentence_doc = []
for i in range(N_DOC):
all_sentence_doc.append(all_song[i] + all_text[i])
all_doc_no, all_song, all_lyrics, N_DOC, all_sentence_doc = main.data_var(tree)
tokens_doc = []
for i in range(N_DOC):
tokens_doc.append(main.remove_punc_tokenize(all_sentence_doc[i]))
for i in range(N_DOC):
tokens_doc[i] = main.to_lower(tokens_doc[i])
context = {"tokens_doc": tokens_doc}
context = {
"tokens_doc": tokens_doc
}
return render(request, 'apps/preprocessing2.html', context)
def preprocessing3(request):
from xml.etree.ElementTree import ElementTree
tree = ElementTree()
tree.parse("InvertedIndexSimulator/data/dataset_STBI.xml")
all_doc_no = []
all_song = []
all_text = []
for node in tree.iter("DOCNO"):
all_doc_no.append(node.text)
for node in tree.iter("SONG"):
all_song.append(node.text)
for node in tree.iter("LYRICS"):
all_text.append(node.text)
N_DOC = len(all_text)
all_sentence_doc = []
for i in range(N_DOC):
all_sentence_doc.append(all_song[i] + all_text[i])
all_doc_no, all_song, all_lyrics, N_DOC, all_sentence_doc = main.data_var(tree)
tokens_doc = []
for i in range(N_DOC):
tokens_doc.append(main.remove_punc_tokenize(all_sentence_doc[i]))
......@@ -161,37 +81,18 @@ def preprocessing3(request):
for i in range(N_DOC):
tokens_doc[i] = ([w for w in tokens_doc[i] if not any(j.isdigit() for j in w)])
context = {"tokens_doc": tokens_doc}
context = {
"tokens_doc": tokens_doc
}
return render(request, 'apps/preprocessing3.html', context)
def preprocessing4(request):
from xml.etree.ElementTree import ElementTree
tree = ElementTree()
tree.parse("InvertedIndexSimulator/data/dataset_STBI.xml")
all_doc_no = []
all_song = []
all_text = []
for node in tree.iter("DOCNO"):
all_doc_no.append(node.text)
for node in tree.iter("SONG"):
all_song.append(node.text)
for node in tree.iter("LYRICS"):
all_text.append(node.text)
N_DOC = len(all_text)
all_sentence_doc = []
for i in range(N_DOC):
all_sentence_doc.append(all_song[i] + all_text[i])
all_doc_no, all_song, all_lyrics, N_DOC, all_sentence_doc = main.data_var(tree)
tokens_doc = []
for i in range(N_DOC):
tokens_doc.append(main.remove_punc_tokenize(all_sentence_doc[i]))
......@@ -207,33 +108,17 @@ def preprocessing4(request):
for i in range(N_DOC):
tokens_doc[i] = main.stemming(tokens_doc[i])
context = {"tokens_doc": tokens_doc}
context = {
"tokens_doc": tokens_doc
}
return render(request, 'apps/preprocessing4.html', context)
def indexing(request):
import string
import re
from sklearn.feature_extraction.text import CountVectorizer
import xml.dom.minidom as minidom
dcmnt_xml = minidom.parse("InvertedIndexSimulator/data/dataset_STBI.xml")
all_doc_no = dcmnt_xml.getElementsByTagName('DOCNO')
all_profile = dcmnt_xml.getElementsByTagName('SONG')
all_date = dcmnt_xml.getElementsByTagName('ARTIST')
all_text = dcmnt_xml.getElementsByTagName('LYRICS')
all_pub = dcmnt_xml.getElementsByTagName('PUB')
all_page = dcmnt_xml.getElementsByTagName('PAGE')
N_DOC = len(all_doc_no)
all_sentence_doc_sample = []
for i in range(N_DOC):
sentence_doc_sample = ' '+ all_text[i].firstChild.data
all_sentence_doc_sample.append(sentence_doc_sample)
all_doc_no, N_DOC, all_sentence_doc_sample = main.load_data(dcmnt_xml)
tokens_doc = []
......@@ -249,65 +134,40 @@ def indexing(request):
for i in range(N_DOC):
tokens_doc[i] = main.stemming(tokens_doc[i])
all_tokens = []
for i in range(N_DOC):
for w in tokens_doc[i]:
all_tokens.append(w)
new_sentence = ' '.join([w for w in all_tokens])
for w in CountVectorizer().build_tokenizer()(new_sentence):
all_tokens.append(w)
all_tokens = set(all_tokens)
proximity_index = {}
for token in all_tokens:
dict_doc_position = {}
for n in range(N_DOC):
if(token in tokens_doc[n]):
dict_doc_position[all_doc_no[n].firstChild.data] = [i+1 for i, j in zip(count(), tokens_doc[n]) if j == token]
proximity_index[token] = dict_doc_position
proximity_index = collections.OrderedDict(sorted(proximity_index.items()))
import json
indexnya = json.loads(json.dumps(proximity_index))
res = main.indexing(N_DOC, tokens_doc, all_doc_no)
words = indexnya.keys()
freq = indexnya.values()
context = {"words": words, "freq": freq}
context = {
"res": res,
}
return render(request, 'apps/indexing.html', context)
def index(request):
return render(request, 'apps/index.html')
def lyric(request,id):
text, judul = main.detail(id)
content={
'no': id,
'judul':judul,
'text':text
}
return render(request, 'apps/lyric.html', content)
def result(request):
#%%
# proximity_index = collections.OrderedDict(sorted(proximity_index.items()))
# for key, value in proximity_index.items():
# # print (key, value)
dcmnt_xml = minidom.parse("InvertedIndexSimulator/data/dataset_STBI.xml")
if request.method == 'POST':
query = request.POST['querysearch']
hasil= main.main(query)
res = main.searching(dcmnt_xml, query)
content={
'hasil':hasil,
content = {
'res':res,
'query':query
}
return render(request, 'apps/result.html', content)
def lyric(request,id):
lyrics, judul = main.detail(id)
content = {
'no': id,
'judul':judul,
'lyrics':lyrics,
}
return render(request, 'apps/lyric.html', content)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment