Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
SearchEngine
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Rosa Delima Mendrofa
SearchEngine
Commits
8f565798
Commit
8f565798
authored
May 28, 2020
by
Yolanda Nainggolan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added searching, fixed result and lyrics
parent
2d25e3c9
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
548 additions
and
614 deletions
+548
-614
views.cpython-37.pyc
...e/InvertedIndexSimulator/__pycache__/views.cpython-37.pyc
+0
-0
main.cpython-37.pyc
...edIndexSimulator/inverted/__pycache__/main.cpython-37.pyc
+0
-0
main.py
SearchEngine/InvertedIndexSimulator/inverted/main.py
+256
-185
dataframe.min.css
...nvertedIndexSimulator/static/assets/css/dataframe.min.css
+5
-12
trying.min.css
...e/InvertedIndexSimulator/static/assets/css/trying.min.css
+159
-0
dataframe.html
...gine/InvertedIndexSimulator/templates/apps/dataframe.html
+16
-79
indexing.html
...ngine/InvertedIndexSimulator/templates/apps/indexing.html
+14
-52
lyric.html
...chEngine/InvertedIndexSimulator/templates/apps/lyric.html
+32
-66
result.html
...hEngine/InvertedIndexSimulator/templates/apps/result.html
+20
-35
views.py
SearchEngine/InvertedIndexSimulator/views.py
+46
-185
urls.cpython-37.pyc
SearchEngine/SearchEngine/__pycache__/urls.cpython-37.pyc
+0
-0
No files found.
SearchEngine/InvertedIndexSimulator/__pycache__/views.cpython-37.pyc
View file @
8f565798
No preview for this file type
SearchEngine/InvertedIndexSimulator/inverted/__pycache__/main.cpython-37.pyc
View file @
8f565798
No preview for this file type
SearchEngine/InvertedIndexSimulator/inverted/main.py
View file @
8f565798
...
@@ -2,19 +2,86 @@ resource_package = __name__
...
@@ -2,19 +2,86 @@ resource_package = __name__
import
string
import
string
import
re
import
re
import
collections
import
math
import
pandas
as
pd
import
json
import
xml.dom.minidom
as
minidom
import
xml.etree.ElementTree
as
et
from
xml.etree.ElementTree
import
ElementTree
from
sklearn.feature_extraction.text
import
CountVectorizer
from
sklearn.feature_extraction.text
import
CountVectorizer
from
nltk.corpus
import
stopwords
from
nltk.corpus
import
stopwords
from
nltk.tokenize
import
sent_tokenize
,
word_tokenize
from
nltk.tokenize
import
sent_tokenize
,
word_tokenize
from
Sastrawi.Stemmer.StemmerFactory
import
StemmerFactory
from
Sastrawi.Stemmer.StemmerFactory
import
StemmerFactory
from
itertools
import
count
from
itertools
import
count
import
collections
try
:
import
math
from
future_builtins
import
zip
import
xml.etree.ElementTree
as
et
except
ImportError
:
# not 2.6+ or is 3.x
from
xml.etree.ElementTree
import
ElementTree
try
:
from
itertools
import
izip
as
zip
# < 2.5 or 3.x
except
ImportError
:
pass
##############Show Dataframe########################
def
show_dataframe
(
parse_data
):
data
=
parse_data
.
getroot
()
df_cols
=
[
"DOCNO"
,
"SONG"
,
"ARTIST"
,
"LYRICS"
]
rows
=
[]
for
node
in
data
:
s_docno
=
node
.
find
(
"DOCNO"
)
.
text
if
node
is
not
None
else
None
s_song
=
node
.
find
(
"SONG"
)
.
text
if
node
is
not
None
else
None
s_artist
=
node
.
find
(
"ARTIST"
)
.
text
if
node
is
not
None
else
None
s_lyrics
=
node
.
find
(
"LYRICS"
)
.
text
if
node
is
not
None
else
None
rows
.
append
({
"DOCNO"
:
s_docno
,
"SONG"
:
s_song
,
"ARTIST"
:
s_artist
,
"LYRICS"
:
s_lyrics
})
DataFrame
=
pd
.
DataFrame
(
rows
,
columns
=
df_cols
)
dictionary
=
DataFrame
.
set_index
(
'DOCNO'
)
.
T
.
to_dict
(
'list'
)
nilai
=
list
(
dictionary
.
values
())
nomornya
=
list
(
dictionary
.
keys
())
for
i
in
range
(
0
,
len
(
nomornya
)):
nomornya
[
i
]
=
int
(
nomornya
[
i
])
lagunya
=
[
sublist
[
0
]
for
sublist
in
nilai
]
artisnya
=
[
sublist
[
1
]
for
sublist
in
nilai
]
liriknya
=
[
sublist
[
2
]
for
sublist
in
nilai
]
context
=
{
"DOCNO"
:
nomornya
,
"SONG"
:
lagunya
,
"ARTIST"
:
artisnya
,
"LYRICS"
:
liriknya
}
return
context
##############N_DOC########################
def
data_var
(
tree
):
tree
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
all_doc_no
=
[]
all_song
=
[]
all_lyrics
=
[]
##############Remove Punctuation, URL and Tokenize###################
for
node
in
tree
.
iter
(
"DOCNO"
):
all_doc_no
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"SONG"
):
all_song
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"LYRICS"
):
all_lyrics
.
append
(
node
.
text
)
N_DOC
=
len
(
all_lyrics
)
all_sentence_doc
=
[]
for
i
in
range
(
N_DOC
):
all_sentence_doc
.
append
(
all_song
[
i
]
+
all_lyrics
[
i
])
return
all_doc_no
,
all_song
,
all_lyrics
,
N_DOC
,
all_sentence_doc
##############Remove Punctuation###################
def
remove_punc_tokenize
(
sentence
):
def
remove_punc_tokenize
(
sentence
):
tokens
=
[]
tokens
=
[]
for
punctuation
in
string
.
punctuation
:
for
punctuation
in
string
.
punctuation
:
sentence
=
sentence
.
replace
(
punctuation
,
" "
)
sentence
=
sentence
.
replace
(
punctuation
,
" "
)
...
@@ -29,24 +96,80 @@ def remove_punc_tokenize(sentence):
...
@@ -29,24 +96,80 @@ def remove_punc_tokenize(sentence):
def
to_lower
(
tokens
):
def
to_lower
(
tokens
):
tokens
=
[
x
.
lower
()
for
x
in
tokens
]
tokens
=
[
x
.
lower
()
for
x
in
tokens
]
return
tokens
return
tokens
##############Load Data########################
def
load_data
(
dcmnt_xml
):
all_doc_no
=
dcmnt_xml
.
getElementsByTagName
(
'DOCNO'
)
all_profile
=
dcmnt_xml
.
getElementsByTagName
(
'SONG'
)
all_date
=
dcmnt_xml
.
getElementsByTagName
(
'ARTIST'
)
all_text
=
dcmnt_xml
.
getElementsByTagName
(
'LYRICS'
)
all_pub
=
dcmnt_xml
.
getElementsByTagName
(
'PUB'
)
all_page
=
dcmnt_xml
.
getElementsByTagName
(
'PAGE'
)
N_DOC
=
len
(
all_doc_no
)
all_sentence_doc_sample
=
[]
for
i
in
range
(
N_DOC
):
sentence_doc_sample
=
' '
+
all_text
[
i
]
.
firstChild
.
data
all_sentence_doc_sample
.
append
(
sentence_doc_sample
)
return
all_doc_no
,
N_DOC
,
all_sentence_doc_sample
##############Indexing########################
def
indexing
(
N_DOC
,
tokens_doc
,
all_doc_no
):
all_tokens
=
[]
for
i
in
range
(
N_DOC
):
for
w
in
tokens_doc
[
i
]:
all_tokens
.
append
(
w
)
new_sentence
=
' '
.
join
([
w
for
w
in
all_tokens
])
for
w
in
CountVectorizer
()
.
build_tokenizer
()(
new_sentence
):
all_tokens
.
append
(
w
)
all_tokens
=
set
(
all_tokens
)
proximity_index
=
{}
for
token
in
all_tokens
:
dict_doc_position
=
{}
for
n
in
range
(
N_DOC
):
if
(
token
in
tokens_doc
[
n
]):
dict_doc_position
[
all_doc_no
[
n
]
.
firstChild
.
data
]
=
[
i
+
1
for
i
,
j
in
zip
(
count
(),
tokens_doc
[
n
])
if
j
==
token
]
proximity_index
[
token
]
=
dict_doc_position
proximity_index
=
collections
.
OrderedDict
(
sorted
(
proximity_index
.
items
()))
indexnya
=
json
.
loads
(
json
.
dumps
(
proximity_index
))
words
=
indexnya
.
keys
()
freq
=
indexnya
.
values
()
freq
=
list
(
freq
)
hasil
=
{}
for
key
in
words
:
for
value
in
freq
:
hasil
[
key
]
=
value
freq
.
remove
(
value
)
break
numb
=
[]
idx
=
[]
for
i
,
j
in
hasil
.
items
():
numb
.
append
(
i
)
idx
.
append
(
j
)
res
=
{}
for
key
in
numb
:
for
value
in
idx
:
res
[
key
]
=
value
idx
.
remove
(
value
)
break
return
res
def
generate_ngrams
(
data
,
n
):
ngram
=
[]
result
=
[]
#menampilkan hasil n-gram per dokumen
for
i
in
range
(
len
(
data
)):
sequences
=
[
data
[
i
][
j
:]
for
j
in
range
(
n
)]
temp
=
zip
(
*
sequences
)
lst
=
list
(
temp
)
result
.
append
([
" "
.
join
(
lst
)
for
lst
in
lst
])
#menggabungkan n-gram semua dokumen dalam bentuk array
for
i
in
range
(
len
(
result
)):
for
j
in
range
(
len
(
result
[
i
])):
ngram
.
append
(
result
[
i
][
j
])
return
ngram
,
result
from
nltk.corpus
import
stopwords
from
nltk.corpus
import
stopwords
stop_words
=
set
(
stopwords
.
words
(
'english'
))
stop_words
=
set
(
stopwords
.
words
(
'english'
))
...
@@ -63,190 +186,139 @@ def stemming(tokens):
...
@@ -63,190 +186,139 @@ def stemming(tokens):
tokens
[
i
]
=
stemmer
.
stem
(
tokens
[
i
])
tokens
[
i
]
=
stemmer
.
stem
(
tokens
[
i
])
return
tokens
return
tokens
def
main
(
query
):
tree
=
ElementTree
()
tree
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
all_doc_no
=
[]
all_song
=
[]
all_text
=
[]
for
node
in
tree
.
iter
(
"DOCNO"
):
all_doc_no
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"SONG"
):
all_song
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"LYRICS"
):
all_text
.
append
(
node
.
text
)
N_DOC
=
len
(
all_text
)
def
searching
(
dcmnt_xml
,
query
):
all_doc_no
=
dcmnt_xml
.
getElementsByTagName
(
'DOCNO'
)
all_song
=
dcmnt_xml
.
getElementsByTagName
(
'SONG'
)
all_lyrics
=
dcmnt_xml
.
getElementsByTagName
(
'LYRICS'
)
N_DOC
=
len
(
all_doc_no
)
all_sentence_doc
=
[]
all_sentence_doc
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
all_sentence_doc
.
append
(
all_song
[
i
]
+
all_text
[
i
])
sentence_doc
=
all_song
[
i
]
.
firstChild
.
data
+
' '
+
all_lyrics
[
i
]
.
firstChild
.
data
all_sentence_doc
.
append
(
sentence_doc
)
tokens_doc
=
[]
tokens_doc
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
.
append
(
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
tokens_doc
.
append
(
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
to_lower
(
tokens_doc
[
i
])
tokens_doc
[
i
]
=
to_lower
(
tokens_doc
[
i
])
stop_words
=
set
(
stopwords
.
words
(
'english'
))
stopping
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
temp
=
[]
tokens_doc
[
i
]
=
stop_word_token
(
tokens_doc
[
i
])
for
j
in
tokens_doc
[
i
]:
if
j
not
in
stop_words
:
temp
.
append
(
j
)
stopping
.
append
(
temp
)
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
([
w
for
w
in
stopping
[
i
]
if
not
any
(
j
.
isdigit
()
for
j
in
w
)])
tokens_doc
[
i
]
=
([
w
for
w
in
tokens_doc
[
i
]
if
not
any
(
j
.
isdigit
()
for
j
in
w
)])
f
actory
=
StemmerFactory
()
f
or
i
in
range
(
N_DOC
):
stemmer
=
factory
.
create_stemmer
(
)
tokens_doc
[
i
]
=
stemming
(
tokens_doc
[
i
]
)
stemming
=
[]
all_tokens
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
temp
=
[]
for
j
in
tokens_doc
[
i
]:
for
j
in
tokens_doc
[
i
]:
# print
(j)
all_tokens
.
append
(
j
)
temp
.
append
(
stemmer
.
stem
(
j
))
stemming
.
append
(
temp
)
new_sentences
=
' '
.
join
([
w
for
w
in
all_tokens
]
)
all_tokens
=
[]
for
j
in
CountVectorizer
()
.
build_tokenizer
()(
new_sentences
):
for
i
in
range
(
N_DOC
):
all_tokens
.
append
(
j
)
for
w
in
stemming
[
i
]:
all_tokens
.
append
(
w
)
all_tokens
=
set
(
all_tokens
)
proximity_index
=
{}
for
token
in
all_tokens
:
dict_doc_position
=
{}
for
n
in
range
(
N_DOC
):
if
(
token
in
tokens_doc
[
n
]):
dict_doc_position
[
all_doc_no
[
n
]
.
firstChild
.
data
]
=
[
i
+
1
for
i
,
j
in
zip
(
count
(),
tokens_doc
[
n
])
if
j
==
token
]
proximity_index
[
token
]
=
dict_doc_position
import
collections
proximity_index
=
collections
.
OrderedDict
(
sorted
(
proximity_index
.
items
()))
kunci
=
[]
nilai
=
[]
for
key
,
value
in
proximity_index
[
query
]
.
items
():
kunci
.
append
(
key
)
nilai
.
append
(
value
)
dict
=
{}
for
key
in
kunci
:
for
value
in
nilai
:
dict
[
key
]
=
value
nilai
.
remove
(
value
)
break
xtree
=
et
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
xroot
=
xtree
.
getroot
()
new_sentence
=
' '
.
join
([
w
for
w
in
all_tokens
])
df_cols
=
[
"SONG"
]
rows
=
[]
for
w
in
CountVectorizer
()
.
build_tokenizer
()(
new_sentence
):
for
node
in
xroot
:
all_tokens
.
append
(
w
)
lirik
=
node
.
find
(
"SONG"
)
.
text
if
node
is
not
None
else
None
rows
.
append
({
"SONG"
:
lirik
})
all_tokens
=
set
(
all_tokens
)
df
=
pd
.
DataFrame
(
rows
,
columns
=
df_cols
)
alls
=
[]
for
i
in
all_tokens
:
alls
.
append
(
i
)
queri
=
[]
spl
=
query
.
split
()
for
i
in
range
(
len
(
spl
)):
if
not
spl
[
i
]
.
isdigit
():
queri
.
append
(
spl
[
i
])
punc
=
[]
for
i
in
range
(
len
(
queri
)):
no_punc
=
""
for
j
in
range
(
len
(
queri
[
i
])):
if
queri
[
i
][
j
]
not
in
string
.
punctuation
:
no_punc
=
no_punc
+
queri
[
i
][
j
]
punc
.
append
(
no_punc
)
lower
=
[]
for
i
in
range
(
len
(
punc
)):
lower
.
append
(
punc
[
i
]
.
lower
())
stop
=
[]
for
i
in
range
(
len
(
lower
)):
if
lower
[
i
]
not
in
stop_words
:
stop
.
append
(
lower
[
i
])
stem
=
[]
for
i
in
range
(
len
(
stop
)):
stem
.
append
(
stemmer
.
stem
(
stop
[
i
]))
join_word
=
' '
.
join
([
w
for
w
in
stem
])
nomor
=
[]
for
i
in
dict
:
ngram
,
ngram_doc
=
generate_ngrams
(
stemming
,
len
(
stem
))
nomor
.
append
(
int
(
i
))
n_gram_index
=
{}
judul
=
[]
for
ngram_token
in
ngram
:
for
i
in
nomor
:
doc_no
=
[]
judul
.
append
(
df
[
'SONG'
][
i
-
1
])
for
i
in
range
(
N_DOC
):
if
(
ngram_token
in
ngram_doc
[
i
]):
hasil
=
{}
doc_no
.
append
(
all_doc_no
[
i
])
for
key
in
nomor
:
n_gram_index
[
ngram_token
]
=
doc_no
for
value
in
judul
:
hasil
[
key
]
=
value
judul
.
remove
(
value
)
break
numb
=
[]
tit
=
[]
df
=
[]
for
i
,
j
in
hasil
.
items
():
numb
.
append
(
i
)
tit
.
append
(
j
)
res
=
{}
for
key
in
numb
:
for
value
in
tit
:
res
[
key
]
=
value
tit
.
remove
(
value
)
break
return
res
for
i
in
range
(
N_DOC
):
def
detail
(
id
):
count
=
0
for
j
in
range
(
len
(
ngram_doc
[
i
])):
import
pandas
as
pd
if
join_word
==
ngram_doc
[
i
][
j
]:
import
xml.etree.ElementTree
as
et
count
+=
1
import
numpy
as
np
df
.
append
(
count
)
idf
=
[]
for
i
in
range
(
len
(
df
)):
try
:
idf
.
append
(
math
.
log10
(
N_DOC
/
df
[
i
]))
except
ZeroDivisionError
:
idf
.
append
(
str
(
0
))
#w(t, d)
#t = term
#d = document
wtd
=
[]
l
=
[]
for
i
in
range
(
N_DOC
):
dic
=
{}
tf
=
ngram_doc
[
i
]
.
count
(
join_word
)
# menghitung nilai tf
if
tf
!=
0
:
score
=
math
.
log10
(
tf
)
#log10(tf(t,d))
score
+=
1
# 1 + log(tf(t,d))
score
*=
idf
[
i
]
#tf * idf
idx
=
all_doc_no
[
i
]
judul
=
all_song
[
i
]
dic
[
'docno'
]
=
idx
dic
[
'judul'
]
=
judul
dic
[
'score'
]
=
score
l
.
append
(
dic
)
wtd
.
append
(
l
)
# [i+1] = defenisi nomor dokumen; score = wtd
# print(score)
hasil
=
[]
xtree
=
et
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
hasil
.
append
(
sorted
(
wtd
[
0
],
key
=
lambda
x
:
x
[
'score'
],
reverse
=
True
))
xroot
=
xtree
.
getroot
()
return
hasil
df_cols
=
[
"SONG"
,
"LYRICS"
]
rows
=
[]
def
detail
(
nomor
):
for
node
in
xroot
:
tree
=
et
()
judul
=
node
.
find
(
"SONG"
)
.
text
if
node
is
not
None
else
None
tree
.
parse
(
"apps/data/dataset_STBI.xml"
)
lirik
=
node
.
find
(
"LYRICS"
)
.
text
if
node
is
not
None
else
None
rows
.
append
({
"SONG"
:
judul
,
"LYRICS"
:
lirik
})
all_doc_no
=
[]
df
=
pd
.
DataFrame
(
rows
,
columns
=
df_cols
)
all_song
=
[]
all_text
=
[]
lyrics
=
df
[
'LYRICS'
][
id
-
1
]
judul
=
df
[
'SONG'
][
id
-
1
]
return
lyrics
,
judul
for
node
in
tree
.
iter
(
"DOCNO"
):
all_doc_no
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"SONG"
):
# all_song.append(node.text.replace("\n"," "))
all_song
.
append
(
node
.
text
)
head
=
all_song
for
node
in
tree
.
iter
(
"LYRICS"
):
# all_text.append(node.text.replace("\n"," "))
all_text
.
append
(
node
.
text
)
N_DOC
=
len
(
all_text
)
text
=
[]
judul
=
[]
hasil
=
[]
id
=
str
(
nomor
)
for
i
in
range
(
N_DOC
):
check
=
all_doc_no
[
i
]
if
check
==
id
:
text
=
all_text
[
i
]
judul
=
all_song
[
i
]
return
text
,
judul
\ No newline at end of file
SearchEngine/InvertedIndexSimulator/static/assets/css/dataframe.min.css
View file @
8f565798
...
@@ -55,15 +55,13 @@ footer {
...
@@ -55,15 +55,13 @@ footer {
border-radius
:
15px
;
border-radius
:
15px
;
padding
:
20px
;
padding
:
20px
;
margin-top
:
10px
;
margin-top
:
10px
;
width
:
auto
;
width
:
100%
;
}
}
.carda
{
table
{
box-shadow
:
0
4px
8px
0
rgba
(
0
,
0
,
0
,
0.2
);
table-layout
:
fixed
;
border-radius
:
15px
;
border
:
1px
solid
black
;
padding
:
20px
;
width
:
100px
;
margin-top
:
10px
;
width
:
max-content
;
}
}
.jumbotron
{
.jumbotron
{
...
@@ -155,11 +153,6 @@ button:hover span:after {
...
@@ -155,11 +153,6 @@ button:hover span:after {
right
:
0
;
right
:
0
;
}
}
table
,
th
,
td
{
border
:
1px
solid
black
;
border-collapse
:
collapse
;
}
form
button
{
form
button
{
display
:
inline-block
;
display
:
inline-block
;
...
...
SearchEngine/InvertedIndexSimulator/static/assets/css/trying.min.css
0 → 100644
View file @
8f565798
@import
url('https://fonts.googleapis.com/css?family=Quicksand:400,700&display=swap')
;
body
{
font-family
:
sans-serif
;
}
h2
,
h3
{
color
:
#00a2c6
}
footer
{
color
:
white
;
background-color
:
#591a75
}
nav
a
{
font-size
:
18px
;
font-weight
:
400
;
text-decoration
:
none
;
}
nav
a
:hover
{
font-weight
:
bold
;
}
.profile
header
{
text-align
:
center
;
}
footer
{
position
:
fixed
;
left
:
0
;
bottom
:
0
;
width
:
100%
;
padding
:
5px
;
color
:
white
;
background-color
:
#440f5c
;
text-align
:
center
;
font-weight
:
bold
;
}
.featured-image
{
width
:
100%
;
max-height
:
300px
;
object-fit
:
cover
;
object-position
:
center
;
}
.card
{
box-shadow
:
0
4px
8px
0
rgba
(
0
,
0
,
0
,
0.2
);
border-radius
:
15px
;
padding
:
20px
;
margin-top
:
10px
;
}
.jumbotron
{
font-size
:
20px
;
padding
:
60px
;
text-align
:
center
;
color
:
white
;
background-image
:
url(https://ak.picdn.net/assets/cms/music_subscription_homepage_banner.jpg)
;
background-size
:
cover
;
background-repeat
:
no-repeat
;
text-shadow
:
black
0.3em
0.3em
0.3em
;
}
nav
{
background-color
:
#091729
;
padding
:
5px
;
position
:
sticky
;
top
:
0
;
}
nav
a
{
font-size
:
18px
;
font-weight
:
400
;
text-decoration
:
none
;
color
:
white
;
}
body
{
font-family
:
'Quicksand'
,
sans-serif
;
margin
:
0
;
padding
:
0
;
}
main
{
padding
:
15px
;
overflow
:
auto
;
}
#content
{
width
:
100%
;
}
*
{
box-sizing
:
border-box
;
}
.button
{
display
:
inline-block
;
border-radius
:
4px
;
background-color
:
#7c1ca6
;
border
:
none
;
color
:
#FFFFFF
;
text-align
:
center
;
font-size
:
15px
;
padding
:
20px
;
transition
:
all
0.5s
;
cursor
:
pointer
;
margin
:
5px
;
}
button
span
{
cursor
:
pointer
;
display
:
inline-block
;
position
:
relative
;
transition
:
0.5s
;
}
button
span
:after
{
content
:
'\00bb'
;
position
:
absolute
;
opacity
:
0
;
top
:
0
;
right
:
-20px
;
transition
:
0.5s
;
}
button
:hover
span
{
padding-right
:
25px
;
}
button
:hover
span
:after
{
opacity
:
1
;
right
:
0
;
}
form
button
{
display
:
inline-block
;
border-radius
:
4px
;
background-color
:
#7c1ca6
;
border
:
none
;
color
:
#FFFFFF
;
text-align
:
center
;
font-size
:
15px
;
padding
:
10px
;
transition
:
all
0.5s
;
cursor
:
pointer
;
margin
:
5px
;
width
:
80px
;
}
\ No newline at end of file
SearchEngine/InvertedIndexSimulator/templates/apps/dataframe.html
View file @
8f565798
...
@@ -5,23 +5,6 @@
...
@@ -5,23 +5,6 @@
<meta
name=
"viewport"
content=
"width=device-width, initial-scale=1"
>
<meta
name=
"viewport"
content=
"width=device-width, initial-scale=1"
>
<title>
Song Lyric Search Engine
</title>
<title>
Song Lyric Search Engine
</title>
<link
href=
"../../static/assets/css/dataframe.min.css"
rel=
"stylesheet"
>
<link
href=
"../../static/assets/css/dataframe.min.css"
rel=
"stylesheet"
>
<style>
#leftbox
{
text-align
:
center
;
float
:
left
;
white-space
:
nowrap
;
}
#middlebox
{
float
:
left
;
text-align
:
center
;
white-space
:
nowrap
;
}
#middleboxb
{
float
:
left
;
text-align
:
left
;
white-space
:
nowrap
;
}
</style>
</head>
</head>
<body>
<body>
...
@@ -38,69 +21,23 @@
...
@@ -38,69 +21,23 @@
</div>
</div>
<center><h1>
Dataset
</h1><br></center>
<center><h1>
Dataset
</h1><br></center>
<article
class=
"carda"
style=
"overflow-x:scroll; overflow-y:scroll;"
>
<table>
<tr>
<
div
id =
"leftbox"
>
<
th>
DOCNO
</th>
<table
>
<th>
ARTIST
</th
>
<tr
>
<th>
SONG
</th
>
<th>
DOCNO
</th>
<th>
LYRICS
</th>
</tr>
</tr>
{% for i in DOCNO %}
{% for i in DOCNO %}
<tr>
<tr>
<td>
{{ i }}
</td>
<td>
{{ i }}
</td>
</tr>
<td>
{{ j }}
</td>
{% endfor %}
<td>
{{ k }}
</td>
<td>
{{ l }}
</td>
</table>
</tr>
</div>
{% endfor %}
</table>
<div
id =
"middlebox"
>
<table
align=
"left"
>
<tr>
<th>
SONG
</th>
</tr>
{% for i in SONG %}
<tr>
<td>
{{ i }}
</td>
</tr>
{% endfor %}
</table>
</div>
<div
id =
"middlebox"
>
<table>
<tr>
<th>
ARTIST
</th>
</tr>
{% for i in ARTIST %}
<tr>
<td>
{{ i }}
</td>
</tr>
{% endfor %}
</table>
</div>
<div
id =
"middleboxb"
>
<table>
<tr>
<th>
LYRICS
</th>
</tr>
{% for i in LYRICS %}
<tr>
<td>
{{ i }}
</td>
</tr>
{% endfor %}
</table>
</div>
</article>
</article>
</article>
</div>
</div>
...
...
SearchEngine/InvertedIndexSimulator/templates/apps/indexing.html
View file @
8f565798
...
@@ -4,24 +4,7 @@
...
@@ -4,24 +4,7 @@
<head>
<head>
<meta
name=
"viewport"
content=
"width=device-width, initial-scale=1"
>
<meta
name=
"viewport"
content=
"width=device-width, initial-scale=1"
>
<title>
Song Lyric Search Engine
</title>
<title>
Song Lyric Search Engine
</title>
<link
href=
"../../static/assets/css/dataframe.min.css"
rel=
"stylesheet"
>
<link
href=
"../../static/assets/css/trying.min.css"
rel=
"stylesheet"
>
<style>
#leftbox
{
text-align
:
center
;
float
:
left
;
white-space
:
nowrap
;
}
#middlebox
{
float
:
left
;
text-align
:
center
;
white-space
:
nowrap
;
}
#middleboxb
{
float
:
left
;
text-align
:
left
;
white-space
:
nowrap
;
}
</style>
</head>
</head>
<body>
<body>
...
@@ -37,41 +20,20 @@
...
@@ -37,41 +20,20 @@
</div>
</div>
</div>
</div>
<center><h1>
Proximity Index
</h1><br></center>
<center><p
style=
"font-size:40px;"
><strong>
Indexing
</strong></p>
<article
class=
"carda"
style=
"overflow-x:scroll; overflow-y:scroll;"
>
<table
width=
"100%"
;
border=
"1px solid black"
>
<tr>
<div
id =
"leftbox"
>
<th>
Token
</th>
<table>
<th>
Index
</th>
<tr>
</tr>
<th>
Token
</th>
</tr>
{% for i in words %}
{% for key, values in res.items %}
<tr>
<tr>
<td>
{{ i }}
</td>
<td>
{{ key }}
</td>
</tr>
<td>
{{ values }}
</td>
{% endfor %}
</tr>
{% endfor %}
</table>
</table>
</div>
<div
id =
"middleboxb"
>
<table
align=
"left"
>
<tr>
<th>
Index
</th>
</tr>
{% for i in freq %}
<tr>
<td>
{{ i }}
</td>
</tr>
{% endfor %}
</table>
</div>
</article>
</article>
</article>
</div>
</div>
...
...
SearchEngine/InvertedIndexSimulator/templates/apps/lyric.html
View file @
8f565798
<!DOCTYPE html>
<!DOCTYPE html>
<html
lang=
"en"
>
<html
lang=
"en"
>
<head>
<head>
<meta
charset=
"utf-8"
>
<meta
charset=
"utf-8"
>
<meta
name=
"viewport"
content=
"width=device-width, initial-scale=1, shrink-to-fit=no"
>
<meta
name=
"viewport"
content=
"width=device-width, initial-scale=1, shrink-to-fit=no"
>
<meta
name=
"description"
content=
""
>
<meta
name=
"description"
content=
""
>
<meta
name=
"author"
content=
""
>
<meta
name=
"author"
content=
""
>
<title>
Inverted Index
</title>
<title>
Inverted Index
</title>
<!-- Bootstrap core CSS -->
<!-- Bootstrap core CSS -->
<link
href=
"../../static/assets/vendor/bootstrap/css/bootstrap.min.css"
rel=
"stylesheet"
>
<link
href=
"../../static/assets/vendor/bootstrap/css/bootstrap.min.css"
rel=
"stylesheet"
>
<!-- Custom fonts for this template -->
<!-- Custom fonts for this template -->
<link
href=
"../../static/assets/vendor/fontawesome-free/css/all.min.css"
rel=
"stylesheet"
>
<link
href=
"../../static/assets/vendor/fontawesome-free/css/all.min.css"
rel=
"stylesheet"
>
<link
href=
"../../static/assets/vendor/simple-line-icons/css/simple-line-icons.css"
rel=
"stylesheet"
type=
"text/css"
>
<link
href=
"../../static/assets/vendor/simple-line-icons/css/simple-line-icons.css"
rel=
"stylesheet"
type=
"text/css"
>
<link
href=
"https://fonts.googleapis.com/css?family=Lato:300,400,700,300italic,400italic,700italic"
rel=
"stylesheet"
type=
"text/css"
>
<link
href=
"https://fonts.googleapis.com/css?family=Lato:300,400,700,300italic,400italic,700italic"
rel=
"stylesheet"
type=
"text/css"
>
<!-- Custom styles for this template -->
<!-- Custom styles for this template -->
<link
href=
"../../static/assets/css/landing-page.min.css"
rel=
"stylesheet"
>
<link
href=
"../../static/assets/css/landing-page.min.css"
rel=
"stylesheet"
>
</head>
</head>
<body>
<body>
<!-- Navigation -->
<nav
class=
"navbar navbar-light bg-light static-top"
>
<nav
class=
"navbar navbar-light bg-light static-top"
>
<div
class=
"container"
>
<div
class=
"container"
>
<a
class=
"navbar-brand"
href=
"/"
>
Search Simulator
</a>
<a
class=
"navbar-brand"
href=
"/"
>
Cari Lagu
</a>
</div>
<!-- <a class="btn btn-primary" href="#">Pilih Buku</a>
</nav>
-->
</div>
</nav>
<!-- Masthead -->
<section
class=
"testimonials text-center bg-light"
>
<!-- <header class="masthead text-white text-center">
<div
class=
"container"
>
<div class="overlay"></div>
<h2
class=
"mb-3"
>
Lirik Lagu
</h2>
<div class="container">
<h4
class=
"mb-3"
>
No. {{ no }} - {{ judul }}
</h4>
<div class="row">
<p>
{{ lyrics }}
</p>
<div class="col-xl-9 mx-auto">
</div>
<h1 class="mb-5">Silahkan masukkan lirik dari lagu yang ingin Anda temukan</h1>
</section>
</div>
<div class="col-md-10 col-lg-8 col-xl-7 mx-auto">
<form method="POST" action="/search">
<div class="form-row">
<div class="col-12 col-md-9 mb-2 mb-md-0">
<input type="text" class="form-control form-control-lg" name="querysearch" placeholder="Masukkan Query Anda...">
</div>
<div class="col-12 col-md-3">
<button type="submit" class="btn btn-block btn-lg btn-primary">Cari!</button>
</div>
</div>
</form>
</div>
</div>
</div>
</header> -->
<script
src=
"../../static/assets/vendor/jquery/jquery.min.js"
></script>
<script
src=
"../../static/assets/vendor/bootstrap/js/bootstrap.bundle.min.js"
></script>
<!-- Testimonials -->
</body>
<section
class=
"testimonials text-center bg-light"
>
<div
class=
"container"
>
<h2
class=
"mb-3"
>
Lirik Lagu
</h2>
<h4
class=
"mb-3"
>
No.{{no}} - {{judul}}
</h4>
<p>
{{text}}
</p>
</div>
</section>
<!-- Bootstrap core JavaScript -->
<script
src=
"../../static/assets/vendor/jquery/jquery.min.js"
></script>
<script
src=
"../../static/assets/vendor/bootstrap/js/bootstrap.bundle.min.js"
></script>
</body>
</html>
</html>
SearchEngine/InvertedIndexSimulator/templates/apps/result.html
View file @
8f565798
...
@@ -24,41 +24,26 @@
...
@@ -24,41 +24,26 @@
</head>
</head>
<body>
<body>
<nav
class=
"navbar navbar-light bg-light static-top"
>
<!-- Navigation -->
<div
class=
"container"
>
<nav
class=
"navbar navbar-light bg-light static-top"
>
<a
class=
"navbar-brand"
href=
"/"
>
Search Simulator
</a>
<div
class=
"container"
>
</div>
<a
class=
"navbar-brand"
href=
"/"
>
CariLagu
</a>
</nav>
<!-- <a class="btn btn-primary" href="#">Pilih Buku</a>
-->
<section
class=
"testimonials text-center bg-light"
>
</div>
<div
class=
"container"
>
</nav>
<h2
class=
"mb-5"
>
Lagu yang sesuai dengan query "{{ query }}"
</h2>
<!-- Testimonials -->
<div
class=
"row"
>
<section
class=
"testimonials text-center bg-light"
>
{% for key, values in res.items %}
<div
class=
"container"
>
<div
class=
"col-lg-4"
>
<h2
class=
"mb-5"
>
Lagu yang sesuai dengan "{{ query }}"
</h2>
<div
class=
"testimonial-item mx-auto mb-5 mb-lg-0"
>
{% if hasil %}
<h5><a
href=
"/lyric/{{ key }}"
>
Lagu No: {{ key }}
</a></h5>
<div
class=
"row"
>
<h5>
"{{ values }}"
</h5>
{% for i in hasil %}
</div>
{% for j in i %}
</div>
{% endfor %}
<div
class=
"col-lg-4"
>
</div>
<div
class=
"testimonial-item mx-auto mb-5 mb-lg-0"
>
</div>
<img
class=
"img-fluid rounded-circle mb-3"
src=
"../../static/img/hkbp.jpg"
alt=
""
>
<h5><a
href=
"/lyric"
>
Lagu No:{{ j.docno }}
</a></h5>
<h5>
"{{ j.judul }}"
</h5>
<p
class=
"font-weight-light mb-0"
>
score :{{ j.score }}
</p>
</div>
</div>
{% endfor %}
{% endfor %}
</div>
{% else %}
<h2
class=
"mb-5"
>
Lagu dengan lirik: "{{ query }}" tidak ditemukan
</h2>
{% endif %}
</div>
</section>
</section>
...
...
SearchEngine/InvertedIndexSimulator/views.py
View file @
8f565798
from
django.shortcuts
import
render
from
django.shortcuts
import
render
from
django.http
import
HttpResponse
from
django.http
import
HttpResponse
from
InvertedIndexSimulator.inverted
import
main
from
InvertedIndexSimulator.inverted
import
main
from
xml.etree.ElementTree
import
ElementTree
from
sklearn.feature_extraction.text
import
CountVectorizer
from
itertools
import
count
import
pandas
as
pd
import
pandas
as
pd
import
xml.etree.ElementTree
as
et
import
xml.etree.ElementTree
as
et
import
string
import
string
import
re
import
re
from
sklearn.feature_extraction.text
import
CountVectorizer
import
json
import
xml.dom.minidom
as
minidom
import
xml.dom.minidom
as
minidom
import
collections
import
collections
from
itertools
import
count
try
:
try
:
from
future_builtins
import
zip
from
future_builtins
import
zip
except
ImportError
:
# not 2.6+ or is 3.x
except
ImportError
:
# not 2.6+ or is 3.x
...
@@ -17,138 +19,56 @@ except ImportError: # not 2.6+ or is 3.x
...
@@ -17,138 +19,56 @@ except ImportError: # not 2.6+ or is 3.x
except
ImportError
:
except
ImportError
:
pass
pass
def
home
(
request
):
def
home
(
request
):
return
render
(
request
,
'apps/home.html'
)
return
render
(
request
,
'apps/home.html'
)
def
dataframe
(
request
):
def
dataframe
(
request
):
parse_data
=
et
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
parse_data
=
et
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
data
=
parse_data
.
getroot
()
context
=
main
.
show_dataframe
(
parse_data
)
df_cols
=
[
"DOCNO"
,
"SONG"
,
"ARTIST"
,
"LYRICS"
]
rows
=
[]
for
node
in
data
:
s_docno
=
node
.
find
(
"DOCNO"
)
.
text
if
node
is
not
None
else
None
s_song
=
node
.
find
(
"SONG"
)
.
text
if
node
is
not
None
else
None
s_artist
=
node
.
find
(
"ARTIST"
)
.
text
if
node
is
not
None
else
None
s_lyrics
=
node
.
find
(
"LYRICS"
)
.
text
if
node
is
not
None
else
None
rows
.
append
({
"DOCNO"
:
s_docno
,
"SONG"
:
s_song
,
"ARTIST"
:
s_artist
,
"LYRICS"
:
s_lyrics
})
DataFrame
=
pd
.
DataFrame
(
rows
,
columns
=
df_cols
)
dictionary
=
DataFrame
.
set_index
(
'DOCNO'
)
.
T
.
to_dict
(
'list'
)
nilai
=
list
(
dictionary
.
values
())
nomornya
=
list
(
dictionary
.
keys
())
lagunya
=
[
sublist
[
0
]
for
sublist
in
nilai
]
artisnya
=
[
sublist
[
1
]
for
sublist
in
nilai
]
liriknya
=
[
sublist
[
2
]
for
sublist
in
nilai
]
context
=
{
"DOCNO"
:
nomornya
,
"SONG"
:
lagunya
,
"ARTIST"
:
artisnya
,
"LYRICS"
:
liriknya
}
return
render
(
request
,
'apps/dataframe.html'
,
context
)
return
render
(
request
,
'apps/dataframe.html'
,
context
)
def
preprocessing
(
request
):
def
preprocessing
(
request
):
from
xml.etree.ElementTree
import
ElementTree
tree
=
ElementTree
()
tree
=
ElementTree
()
tree
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
all_doc_no
,
all_song
,
all_lyrics
,
N_DOC
,
all_sentence_doc
=
main
.
data_var
(
tree
)
all_doc_no
=
[]
all_song
=
[]
all_text
=
[]
for
node
in
tree
.
iter
(
"DOCNO"
):
all_doc_no
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"SONG"
):
all_song
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"LYRICS"
):
all_text
.
append
(
node
.
text
)
N_DOC
=
len
(
all_text
)
all_sentence_doc
=
[]
for
i
in
range
(
N_DOC
):
all_sentence_doc
.
append
(
all_song
[
i
]
+
all_text
[
i
])
tokens_doc
=
[]
tokens_doc
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
.
append
(
main
.
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
tokens_doc
.
append
(
main
.
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
context
=
{
"tokens_doc"
:
tokens_doc
}
context
=
{
"tokens_doc"
:
tokens_doc
}
return
render
(
request
,
'apps/preprocessing.html'
,
context
)
return
render
(
request
,
'apps/preprocessing.html'
,
context
)
def
preprocessing2
(
request
):
def
preprocessing2
(
request
):
from
xml.etree.ElementTree
import
ElementTree
tree
=
ElementTree
()
tree
=
ElementTree
()
tree
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
all_doc_no
,
all_song
,
all_lyrics
,
N_DOC
,
all_sentence_doc
=
main
.
data_var
(
tree
)
all_doc_no
=
[]
all_song
=
[]
all_text
=
[]
for
node
in
tree
.
iter
(
"DOCNO"
):
all_doc_no
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"SONG"
):
all_song
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"LYRICS"
):
all_text
.
append
(
node
.
text
)
N_DOC
=
len
(
all_text
)
all_sentence_doc
=
[]
for
i
in
range
(
N_DOC
):
all_sentence_doc
.
append
(
all_song
[
i
]
+
all_text
[
i
])
tokens_doc
=
[]
tokens_doc
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
.
append
(
main
.
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
tokens_doc
.
append
(
main
.
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
main
.
to_lower
(
tokens_doc
[
i
])
tokens_doc
[
i
]
=
main
.
to_lower
(
tokens_doc
[
i
])
context
=
{
"tokens_doc"
:
tokens_doc
}
context
=
{
"tokens_doc"
:
tokens_doc
}
return
render
(
request
,
'apps/preprocessing2.html'
,
context
)
return
render
(
request
,
'apps/preprocessing2.html'
,
context
)
def
preprocessing3
(
request
):
def
preprocessing3
(
request
):
from
xml.etree.ElementTree
import
ElementTree
tree
=
ElementTree
()
tree
=
ElementTree
()
tree
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
all_doc_no
,
all_song
,
all_lyrics
,
N_DOC
,
all_sentence_doc
=
main
.
data_var
(
tree
)
all_doc_no
=
[]
all_song
=
[]
all_text
=
[]
for
node
in
tree
.
iter
(
"DOCNO"
):
all_doc_no
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"SONG"
):
all_song
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"LYRICS"
):
all_text
.
append
(
node
.
text
)
N_DOC
=
len
(
all_text
)
all_sentence_doc
=
[]
for
i
in
range
(
N_DOC
):
all_sentence_doc
.
append
(
all_song
[
i
]
+
all_text
[
i
])
tokens_doc
=
[]
tokens_doc
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
.
append
(
main
.
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
tokens_doc
.
append
(
main
.
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
...
@@ -161,37 +81,18 @@ def preprocessing3(request):
...
@@ -161,37 +81,18 @@ def preprocessing3(request):
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
([
w
for
w
in
tokens_doc
[
i
]
if
not
any
(
j
.
isdigit
()
for
j
in
w
)])
tokens_doc
[
i
]
=
([
w
for
w
in
tokens_doc
[
i
]
if
not
any
(
j
.
isdigit
()
for
j
in
w
)])
context
=
{
"tokens_doc"
:
tokens_doc
}
context
=
{
"tokens_doc"
:
tokens_doc
}
return
render
(
request
,
'apps/preprocessing3.html'
,
context
)
return
render
(
request
,
'apps/preprocessing3.html'
,
context
)
def
preprocessing4
(
request
):
def
preprocessing4
(
request
):
from
xml.etree.ElementTree
import
ElementTree
tree
=
ElementTree
()
tree
=
ElementTree
()
tree
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
all_doc_no
,
all_song
,
all_lyrics
,
N_DOC
,
all_sentence_doc
=
main
.
data_var
(
tree
)
all_doc_no
=
[]
all_song
=
[]
all_text
=
[]
for
node
in
tree
.
iter
(
"DOCNO"
):
all_doc_no
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"SONG"
):
all_song
.
append
(
node
.
text
)
for
node
in
tree
.
iter
(
"LYRICS"
):
all_text
.
append
(
node
.
text
)
N_DOC
=
len
(
all_text
)
all_sentence_doc
=
[]
for
i
in
range
(
N_DOC
):
all_sentence_doc
.
append
(
all_song
[
i
]
+
all_text
[
i
])
tokens_doc
=
[]
tokens_doc
=
[]
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
.
append
(
main
.
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
tokens_doc
.
append
(
main
.
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
...
@@ -207,33 +108,17 @@ def preprocessing4(request):
...
@@ -207,33 +108,17 @@ def preprocessing4(request):
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
main
.
stemming
(
tokens_doc
[
i
])
tokens_doc
[
i
]
=
main
.
stemming
(
tokens_doc
[
i
])
context
=
{
"tokens_doc"
:
tokens_doc
}
context
=
{
"tokens_doc"
:
tokens_doc
}
return
render
(
request
,
'apps/preprocessing4.html'
,
context
)
return
render
(
request
,
'apps/preprocessing4.html'
,
context
)
def
indexing
(
request
):
def
indexing
(
request
):
import
string
import
re
from
sklearn.feature_extraction.text
import
CountVectorizer
import
xml.dom.minidom
as
minidom
dcmnt_xml
=
minidom
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
dcmnt_xml
=
minidom
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
all_doc_no
,
N_DOC
,
all_sentence_doc_sample
=
main
.
load_data
(
dcmnt_xml
)
all_doc_no
=
dcmnt_xml
.
getElementsByTagName
(
'DOCNO'
)
all_profile
=
dcmnt_xml
.
getElementsByTagName
(
'SONG'
)
all_date
=
dcmnt_xml
.
getElementsByTagName
(
'ARTIST'
)
all_text
=
dcmnt_xml
.
getElementsByTagName
(
'LYRICS'
)
all_pub
=
dcmnt_xml
.
getElementsByTagName
(
'PUB'
)
all_page
=
dcmnt_xml
.
getElementsByTagName
(
'PAGE'
)
N_DOC
=
len
(
all_doc_no
)
all_sentence_doc_sample
=
[]
for
i
in
range
(
N_DOC
):
sentence_doc_sample
=
' '
+
all_text
[
i
]
.
firstChild
.
data
all_sentence_doc_sample
.
append
(
sentence_doc_sample
)
tokens_doc
=
[]
tokens_doc
=
[]
...
@@ -249,65 +134,40 @@ def indexing(request):
...
@@ -249,65 +134,40 @@ def indexing(request):
for
i
in
range
(
N_DOC
):
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
main
.
stemming
(
tokens_doc
[
i
])
tokens_doc
[
i
]
=
main
.
stemming
(
tokens_doc
[
i
])
all_tokens
=
[]
res
=
main
.
indexing
(
N_DOC
,
tokens_doc
,
all_doc_no
)
for
i
in
range
(
N_DOC
):
for
w
in
tokens_doc
[
i
]:
all_tokens
.
append
(
w
)
new_sentence
=
' '
.
join
([
w
for
w
in
all_tokens
])
for
w
in
CountVectorizer
()
.
build_tokenizer
()(
new_sentence
):
all_tokens
.
append
(
w
)
all_tokens
=
set
(
all_tokens
)
proximity_index
=
{}
for
token
in
all_tokens
:
dict_doc_position
=
{}
for
n
in
range
(
N_DOC
):
if
(
token
in
tokens_doc
[
n
]):
dict_doc_position
[
all_doc_no
[
n
]
.
firstChild
.
data
]
=
[
i
+
1
for
i
,
j
in
zip
(
count
(),
tokens_doc
[
n
])
if
j
==
token
]
proximity_index
[
token
]
=
dict_doc_position
proximity_index
=
collections
.
OrderedDict
(
sorted
(
proximity_index
.
items
()))
import
json
context
=
{
indexnya
=
json
.
loads
(
json
.
dumps
(
proximity_index
))
"res"
:
res
,
}
words
=
indexnya
.
keys
()
freq
=
indexnya
.
values
()
context
=
{
"words"
:
words
,
"freq"
:
freq
}
return
render
(
request
,
'apps/indexing.html'
,
context
)
return
render
(
request
,
'apps/indexing.html'
,
context
)
def
index
(
request
):
def
index
(
request
):
return
render
(
request
,
'apps/index.html'
)
return
render
(
request
,
'apps/index.html'
)
def
lyric
(
request
,
id
):
text
,
judul
=
main
.
detail
(
id
)
content
=
{
'no'
:
id
,
'judul'
:
judul
,
'text'
:
text
}
return
render
(
request
,
'apps/lyric.html'
,
content
)
def
result
(
request
):
def
result
(
request
):
#%%
dcmnt_xml
=
minidom
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
# proximity_index = collections.OrderedDict(sorted(proximity_index.items()))
# for key, value in proximity_index.items():
# # print (key, value)
if
request
.
method
==
'POST'
:
if
request
.
method
==
'POST'
:
query
=
request
.
POST
[
'querysearch'
]
query
=
request
.
POST
[
'querysearch'
]
hasil
=
main
.
main
(
query
)
res
=
main
.
searching
(
dcmnt_xml
,
query
)
content
=
{
content
=
{
'
hasil'
:
hasil
,
'
res'
:
res
,
'query'
:
query
'query'
:
query
}
}
return
render
(
request
,
'apps/result.html'
,
content
)
return
render
(
request
,
'apps/result.html'
,
content
)
def
lyric
(
request
,
id
):
lyrics
,
judul
=
main
.
detail
(
id
)
content
=
{
'no'
:
id
,
'judul'
:
judul
,
'lyrics'
:
lyrics
,
}
return
render
(
request
,
'apps/lyric.html'
,
content
)
\ No newline at end of file
SearchEngine/SearchEngine/__pycache__/urls.cpython-37.pyc
View file @
8f565798
No preview for this file type
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment