@inbook{5d103b5a1f8549bea5c3c755cf3a112b,
title = "Spam Emails Detection Based on Distributed Word Embedding with Deep Learning",
abstract = "In recent years, a rapid shift from general and random attacks to more sophisticated and advanced ones can be noticed. Unsolicited email or spam is one of the sources of many types of cybercrime techniques that use complicated methods to trick specific victims. Spam detection is one of the leading machine learning-oriented applications in the last decade. In this work, we present a new methodology for detecting spam emails based on deep learning architectures in the context of natural language processing (NLP). Past works on classical machine learning based spam email detection has relied on various feature engineering methods. Identifying a proper feature engineering method is a difficult task and moreover vulnerable in an adversarial environment. Our proposed method leverage the text representation of NLP and map towards spam email detection task. Various email representation methods are utilized to transform emails into email word vectors, as an essential step for machine learning algorithms. Moreover, optimal parameters are identified for many deep learning architectures and email representation by following the hyper-parameter tuning approach. The performance of many classical machine learning classifiers and deep learning architectures with various text representations are evaluated based on publicly available three email corpora. The experimental results show that the deep learning architectures performed better when compared to the standard machine learning classifiers in terms of accuracy, precision, recall, and F1-score. This is essentially due to the fact that the deep learning architectures facilitate to learn hierarchical, abstract and sequential feature representations of emails. Furthermore, word embedding with deep learning has performed well in comparison to the other classical email representation methods. The word embedding simplify to learn the syntactic, semantic and contextual similarity of emails. This endows word embedding with deep learning methods in spam email filtering in the real environment.",
keywords = "Content based filters, Cybercrime, Cybersecurity, Deep learning, Digital forensic techniques, Intrusion detection, Machine learning, Natural language processing, Spam, Text representation",
author = "Sriram Srinivasan and Vinayakumar Ravi and Mamoun Alazab and Simran Ketha and Al-Zoubi, {Ala{\textquoteright} M.} and {Kotti Padannayil}, Soman",
year = "2021",
doi = "10.1007/978-3-030-57024-8_7",
language = "English",
isbn = "978-3-030-57023-1",
series = "Studies in Computational Intelligence",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "161--189",
editor = "Yassine Maleh and Mohammad Shojafar and Mamoun Alazab and Youssef Baddi",
booktitle = "Machine Intelligence and Big Data Analytics for Cybersecurity Applications",
address = "Germany",
edition = "1",
}