Commit e954b1c7 by Yolanda Nainggolan

Final Project

parent b3b033bf
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
<DOC> <DOC>
<DOCNO> 1 </DOCNO> <DOCNO> 1 </DOCNO>
<RANK> 1 </RANK> <RANK> 1 </RANK>
<SONG> Woly Bully </SONG> <SONG> Woly Bully 1998 </SONG>
<ARTIST> Sam the Sham and the Pharaohs </ARTIST> <ARTIST> Sam the Sham and the Pharaohs </ARTIST>
<YEAR> 1965 </YEAR> <YEAR> 1965 </YEAR>
<LYRICS> Sam the sham miscellaneous wooly bully wooly bully sam the sham the pharaohs domingo samudio uno dos one two tres quatro matty told hatty about a thing she saw had two big horns and a wooly jaw wooly bully wooly bully wooly bully wooly bully wooly bully hatty told matty lets dont take no chance lets not belseven come and learn to dance wooly bully wooly bully wooly bully wooly bully wooly bully matty told hatty thats the thing to do get you someone really to pull the wool with you wooly bully wooly bully wooly bully wooly bully wooly bully lseven the letter l and the number 7 when typed they form a rough square l7 so the lyrics mean lets not be square </LYRICS> <LYRICS> Sam the sham miscellaneous wooly bully wooly bully sam the sham the pharaohs domingo samudio uno dos one two tres quatro matty told hatty about a thing she saw had two big horns and a wooly jaw wooly bully wooly bully wooly bully wooly bully wooly bully hatty told matty lets dont take no chance lets not belseven come and learn to dance wooly bully wooly bully wooly bully wooly bully wooly bully matty told hatty thats the thing to do get you someone really to pull the wool with you wooly bully wooly bully wooly bully wooly bully wooly bully lseven the letter l and the number 7 when typed they form a rough square l7 so the lyrics mean lets not be square </LYRICS>
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -45,12 +45,17 @@ def show_dataframe(parse_data): ...@@ -45,12 +45,17 @@ def show_dataframe(parse_data):
for i in range(0, len(nomornya)): for i in range(0, len(nomornya)):
nomornya[i] = int(nomornya[i]) nomornya[i] = int(nomornya[i])
lagunya = [sublist[0] for sublist in nilai] lagunya = [sublist[0] for sublist in nilai]
artisnya = [sublist[1] for sublist in nilai] #artisnya = [sublist[1] for sublist in nilai]
liriknya = [sublist[2] for sublist in nilai] liriknya = [sublist[2] for sublist in nilai]
context = {"DOCNO": nomornya, "SONG": lagunya, "ARTIST": artisnya, "LYRICS": liriknya} res = {}
for key in lagunya:
for value in liriknya:
res[key] = value
liriknya.remove(value)
break
return context return res
##############N_DOC######################## ##############N_DOC########################
def data_var(tree): def data_var(tree):
...@@ -208,14 +213,8 @@ def proximity(dcmnt_xml, query): ...@@ -208,14 +213,8 @@ def proximity(dcmnt_xml, query):
tokens_doc[i] = to_lower(tokens_doc[i]) tokens_doc[i] = to_lower(tokens_doc[i])
for i in range(N_DOC): for i in range(N_DOC):
tokens_doc[i] = stop_word_token(tokens_doc[i])
for i in range(N_DOC):
tokens_doc[i] = ([w for w in tokens_doc[i] if not any(j.isdigit() for j in w)]) tokens_doc[i] = ([w for w in tokens_doc[i] if not any(j.isdigit() for j in w)])
for i in range(N_DOC):
tokens_doc[i] = stemming(tokens_doc[i])
all_tokens =[] all_tokens =[]
for i in range(N_DOC): for i in range(N_DOC):
for j in tokens_doc[i]: for j in tokens_doc[i]:
...@@ -343,14 +342,8 @@ def phrase(dcmnt_xml, query): ...@@ -343,14 +342,8 @@ def phrase(dcmnt_xml, query):
tokens_doc[i] = to_lower(tokens_doc[i]) tokens_doc[i] = to_lower(tokens_doc[i])
for i in range(N_DOC): for i in range(N_DOC):
tokens_doc[i] = stop_word_token(tokens_doc[i])
for i in range(N_DOC):
tokens_doc[i] = ([w for w in tokens_doc[i] if not any(j.isdigit() for j in w)]) tokens_doc[i] = ([w for w in tokens_doc[i] if not any(j.isdigit() for j in w)])
for i in range(N_DOC):
tokens_doc[i] = stemming(tokens_doc[i])
all_tokens =[] all_tokens =[]
for i in range(N_DOC): for i in range(N_DOC):
for j in tokens_doc[i]: for j in tokens_doc[i]:
......
...@@ -32,12 +32,14 @@ ...@@ -32,12 +32,14 @@
<center><p style="font-size:40px;"><strong>Data</strong></p> <center><p style="font-size:40px;"><strong>Data</strong></p>
<table> <table>
<tr> <tr>
<th>LYRICS</th> <th>Title</th>
<th>Lyric</th>
</tr> </tr>
{% for i in LYRICS %} {% for key, values in res.items %}
<tr> <tr>
<td>{{ i }}</td> <td>{{ key }}</td>
<td>{{ values }}</td>
</tr> </tr>
{% endfor %} {% endfor %}
</table> </table>
...@@ -60,6 +62,21 @@ ...@@ -60,6 +62,21 @@
function pageRedirect_next() { function pageRedirect_next() {
window.location.href = "/preprocessing"; window.location.href = "/preprocessing";
} }
var tables = document.getElementsByTagName('table');
var table = tables[tables.length - 1];
var rows = table.rows;
for(var i = 0, td; i < rows.length; i++){
td = document.createElement('td');
if(i == 0){
td.appendChild(document.createTextNode("DocNo"));
}
else{
td.appendChild(document.createTextNode(i));
}
rows[i].insertBefore(td, rows[i].firstChild);
}
</script> </script>
</html> </html>
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
<main> <main>
<div id="content"> <div id="content">
<article class="card"> <article class="card">
<center><h1>Pilih Metode Searching</h1><br> <center><br>
<table> <table>
<tr> <tr>
<td><button onclick="data()"class="button" style="vertical-align:middle"><span> Mulai </span></button></td> <td><button onclick="data()"class="button" style="vertical-align:middle"><span> Mulai </span></button></td>
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
<nav class="navbar navbar-light bg-light static-top"> <nav class="navbar navbar-light bg-light static-top">
<div class="container"> <div class="container">
<a class="navbar-brand" href="/">Search Simulator</a> <a class="navbar-brand" href="/search">Search Simulator</a>
</div> </div>
</nav> </nav>
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
</div> </div>
</div> </div>
<center><p style="font-size:40px;"><strong>Text Preprocessing - 3</strong></p> <center><p style="font-size:40px;"><strong>Text Preprocessing - 3</strong></p>
<p><strong>After Stopwords Removal</strong></p><br></center> <p><strong>After Numbers Removal</strong></p><br></center>
<table style="width:100%"> <table style="width:100%">
<tr> <tr>
<th>All tokens for each document</th> <th>All tokens for each document</th>
...@@ -57,7 +57,7 @@ ...@@ -57,7 +57,7 @@
} }
function pageRedirect_next() { function pageRedirect_next() {
window.location.href = "/preprocessing4"; window.location.href = "/search";
} }
</script> </script>
......
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
<body> <body>
<nav class="navbar navbar-light bg-light static-top"> <nav class="navbar navbar-light bg-light static-top">
<div class="container"> <div class="container">
<a class="navbar-brand" href="/">Search Simulator</a> <a class="navbar-brand" href="/search">Search Simulator</a>
</div> </div>
</nav> </nav>
......
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
<script> <script>
function pageRedirect_prev() { function pageRedirect_prev() {
window.location.href = "/preproseccing4"; window.location.href = "/preprocessing3";
} }
function proximity() { function proximity() {
......
...@@ -24,7 +24,11 @@ def home(request): ...@@ -24,7 +24,11 @@ def home(request):
def dataframe(request): def dataframe(request):
parse_data = et.parse("InvertedIndexSimulator/data/dataset_STBI.xml") parse_data = et.parse("InvertedIndexSimulator/data/dataset_STBI.xml")
context = main.show_dataframe(parse_data) res = main.show_dataframe(parse_data)
context = {
"res": res
}
return render(request, 'apps/dataframe.html', context) return render(request, 'apps/dataframe.html', context)
...@@ -73,9 +77,6 @@ def preprocessing3(request): ...@@ -73,9 +77,6 @@ def preprocessing3(request):
for i in range(N_DOC): for i in range(N_DOC):
tokens_doc[i] = main.to_lower(tokens_doc[i]) tokens_doc[i] = main.to_lower(tokens_doc[i])
for i in range(N_DOC):
tokens_doc[i] = main.stop_word_token(tokens_doc[i])
for i in range(N_DOC): for i in range(N_DOC):
tokens_doc[i] = ([w for w in tokens_doc[i] if not any(j.isdigit() for j in w)]) tokens_doc[i] = ([w for w in tokens_doc[i] if not any(j.isdigit() for j in w)])
...@@ -97,9 +98,6 @@ def preprocessing4(request): ...@@ -97,9 +98,6 @@ def preprocessing4(request):
for i in range(N_DOC): for i in range(N_DOC):
tokens_doc[i] = main.to_lower(tokens_doc[i]) tokens_doc[i] = main.to_lower(tokens_doc[i])
for i in range(N_DOC):
tokens_doc[i] = main.stop_word_token(tokens_doc[i])
for i in range(N_DOC): for i in range(N_DOC):
tokens_doc[i] = ([w for w in tokens_doc[i] if not any(j.isdigit() for j in w)]) tokens_doc[i] = ([w for w in tokens_doc[i] if not any(j.isdigit() for j in w)])
...@@ -125,13 +123,10 @@ def indexing(request): ...@@ -125,13 +123,10 @@ def indexing(request):
tokens_doc.append(main.remove_punc_tokenize(all_sentence_doc_sample[i])) tokens_doc.append(main.remove_punc_tokenize(all_sentence_doc_sample[i]))
for i in range(N_DOC): for i in range(N_DOC):
tokens_doc[i] = main.stop_word_token(tokens_doc[i]) tokens_doc[i] = main.to_lower(tokens_doc[i])
for i in range(N_DOC): for i in range(N_DOC):
tokens_doc[i] = ([w for w in tokens_doc[i] if not any(j.isdigit() for j in w)]) tokens_doc[i] = ([w for w in tokens_doc[i] if not any(j.isdigit() for j in w)])
for i in range(N_DOC):
tokens_doc[i] = main.stemming(tokens_doc[i])
res = main.indexing(N_DOC, tokens_doc, all_doc_no) res = main.indexing(N_DOC, tokens_doc, all_doc_no)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment