Tag: Quran

Find similar Sentences in holy Quran, Python Example

# -*- coding: utf-8 -*-
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize 
import pandas
# Arabic Stop words
arastopword = stopwords.words('arabic')  

names = ['aya']
data = pandas.read_csv('data/qr_with/quran.txt',  names=names)
print("All Ayat: ", len(data ))
print(data)

# Enter terms
Searchfor = 'هارون وزير فرعون'

# similarity degree
simdegree = 0.2

Result = [] 
def findsimilarity(X, Y):
    # tokenization 
    X_list = word_tokenize(X)  
    Y_list = word_tokenize(Y) 
      
    # sw contains the list of stopwords 
    #sw = stopwords.words('arabic')  
    l1 =[];l2 =[] 
      
    # remove stop words from string 
    X_set = {w for w in X_list if not w in arastopword}  
    Y_set = {w for w in Y_list if not w in arastopword} 
      
    # form a set containing keywords of both strings  
    rvector = X_set.union(Y_set)  
    for w in rvector: 
        #print(w)
        if w in X_set: l1.append(1) # create a vector 
        else: l1.append(0) 
        if w in Y_set: l2.append(1) 
        else: l2.append(0) 
    c = 0

  
    # cosine formula  
    for i in range(len(rvector)): 
        c+= l1[i]*l2[i] 
    cosine = c / float((sum(l1)*sum(l2))**0.5) 
    
    #print("similarity: "  , cosine) 

    if cosine > simdegree:      
        Result.append([Y, round(cosine, 4)])


for d in data['aya']:
    findsimilarity(Searchfor, d)

def takeSecond(elem):
    return elem[1]

# sort the result
Result.sort(key=takeSecond)

# print the final result
for j in Result:
    print(j[0])
    print(j[1])
    print('')
    
وأضل فرعون قومه وما هدى
0.25

قال فرعون وما رب العالمين
0.25

فأرسل فرعون في المدائن حاشرين
0.25

من فرعون إنه كان عاليا من المسرفين
0.25

اذهب إلى فرعون إنه طغى
0.2887

اذهبا إلى فرعون إنه طغى
0.2887

قوم فرعون ألا يتقون
0.2887

اذهب إلى فرعون إنه طغى
0.2887

هارون أخي
0.3536

فرعون وثمود
0.3536

{ Add a Comment }

Holy Quran Word Clouds by word2vec algorithm, python example

طريقة رسم الكلمات المتشابهه من القران الكريم على شكل word cloud باستخدام الفكتور مودلword2vec لحساب الكلمات المتشابهة، يمكنك تحميل المودل واتباع خطوات البرنامج للحصول على نفس النتائج.

Download the model:

from gensim.models import KeyedVectors
from bidi.algorithm import get_display
import arabic_reshaper
import numpy as np
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from typing import List, Dict
# function to plot the word cloud 
def plot_word_cloud(word_list: List[str], word_frequency: Dict[str, float]):
    full_string = ' '.join(word_list)
    reshaped_text = arabic_reshaper.reshape(full_string)
    translated_text = get_display(reshaped_text)   
    # Build the Arabic word cloud
    wordc = WordCloud(font_path='tahoma',background_color='white', width=800, height=300).generate(translated_text)
    wordc.fit_words(word_frequency)
        
    # Draw the word cloud
    plt.imshow(wordc)
    plt.axis("off")
    plt.tight_layout(pad = 0)
    
    plt.show()
# load the model
model = KeyedVectors.load('model/quran_w7_m15.bin') 
print("Model loaded")

#check the model size
print ('Number of all words: ', len(model.wv.vocab))
# Enter the word you want to search
Word_to_plot = 'النهار' 

#result size 
retsize = 200
  
temp_tuple = model.most_similar(positive=[Word_to_plot], negative=[], topn = retsize)

similar_words=[i[0] for i in temp_tuple]
        
word_frequency = {}
for word_tuple in temp_tuple:
    reshaped_word = arabic_reshaper.reshape(word_tuple[0])
    key = get_display(reshaped_word)
    word_frequency[key] = word_tuple[1]     
    
 
plot_word_cloud(similar_words, word_frequency)

Result:

# Enter the word you want to search
Word_to_plot = 'كريم' 

#result size 
retsize = 200
# Enter the word you want to search
Word_to_plot = 'النار' 

#result size 
retsize = 200

{ Add a Comment }