From a19b9401e5555786b7738be51e6b22808a750b21 Mon Sep 17 00:00:00 2001 From: netkas Date: Sun, 27 Dec 2020 23:08:53 -0500 Subject: [PATCH] Added SpamDetection --- Makefile | 17 ++++- services/language_detection/setup.py | 1 - services/spam_detection/MANIFEST.in | 5 ++ services/spam_detection/README.md | 59 ++++++++++++++++ .../coffeehouse_spamdetection/__init__.py | 7 ++ .../coffeehouse_spamdetection/__main__.py | 70 +++++++++++++++++++ .../coffeehouse_spamdetection/main.py | 27 +++++++ .../coffeehouse_spamdetection/server.py | 55 +++++++++++++++ services/spam_detection/requirements.txt | 2 + services/spam_detection/setup.py | 31 ++++++++ 10 files changed, 272 insertions(+), 2 deletions(-) create mode 100644 services/spam_detection/MANIFEST.in create mode 100644 services/spam_detection/README.md create mode 100644 services/spam_detection/coffeehouse_spamdetection/__init__.py create mode 100644 services/spam_detection/coffeehouse_spamdetection/__main__.py create mode 100644 services/spam_detection/coffeehouse_spamdetection/main.py create mode 100644 services/spam_detection/coffeehouse_spamdetection/server.py create mode 100644 services/spam_detection/requirements.txt create mode 100644 services/spam_detection/setup.py diff --git a/Makefile b/Makefile index 4ac13e63..4ed5fbe7 100644 --- a/Makefile +++ b/Makefile @@ -25,6 +25,9 @@ clean_his: clean_langdetect: rm -rf services/language_detection/build services/language_detection/dist services/language_detection/coffeehouse_languagedetection.egg-info +clean_spamdetect: + rm -rf services/spam_detection/build services/spam_detection/dist services/spam_detection/coffeehouse_spamdetection.egg-info + clean: make clean_apt clean_stopwords clean_tokenizer clean_nlpfr make clean_dltc @@ -32,6 +35,7 @@ clean: make clean_alg make clean_rf make clean_langdetect + make clean_spamdetect # ====================================================================================================================== @@ -66,6 +70,9 @@ build_rf: build_langdetect: cd services/language_detection; python3 setup.py build; python3 setup.py sdist +build_spamdetect: + cd services/spam_detection; python3 setup.py build; python3 setup.py sdist + build: make build_nlpfr make build_his @@ -73,6 +80,7 @@ build: make build_alg make build_rf make build_langdetect + make build_spamdetect # ====================================================================================================================== @@ -108,6 +116,9 @@ install_rf: install_langdetect: cd services/language_detection; python3 setup.py install +install_spamdetect: + cd services/spam_detection; python3 setup.py install + install: make install_rf make install_nlpfr @@ -115,6 +126,7 @@ install: make install_dltc make install_alg make install_langdetect + make install_spamdetect # ====================================================================================================================== @@ -133,4 +145,7 @@ system_prep_gcc: # ====================================================================================================================== start_langdetect: - python3 -m coffeehouse_languagedetection --start-server \ No newline at end of file + python3 -m coffeehouse_languagedetection --start-server + +start_spamdetect: + python3 -m coffeehouse_spamdetection --start-server \ No newline at end of file diff --git a/services/language_detection/setup.py b/services/language_detection/setup.py index 6d4c8f7d..d956699a 100644 --- a/services/language_detection/setup.py +++ b/services/language_detection/setup.py @@ -1,7 +1,6 @@ import os from setuptools import setup, find_packages -from setuptools.command.develop import develop from setuptools.command.install import install diff --git a/services/spam_detection/MANIFEST.in b/services/spam_detection/MANIFEST.in new file mode 100644 index 00000000..d5726c2a --- /dev/null +++ b/services/spam_detection/MANIFEST.in @@ -0,0 +1,5 @@ +include README.md +include LICENSE +include NOTICE +include MANIFEST.in +include requirements.txt \ No newline at end of file diff --git a/services/spam_detection/README.md b/services/spam_detection/README.md new file mode 100644 index 00000000..8e8176e5 --- /dev/null +++ b/services/spam_detection/README.md @@ -0,0 +1,59 @@ +# CoffeeHouse SpamDetection + +Library for detecting spam by classifying input as spam/ham + + +## Installation + +Install the following packages using the corresponding setup and makefile +operations provided by the repo, or use CoffeeHouse-Server's install script +to install all the required components + + - Hyper-Internal-Service + - CoffeeHouse-NLPFR + - CoffeeHouse-DLTC + - CoffeeHouseMod-Tokenizer + - CoffeeHouseMod-StopWords + - CoffeeHouseMod-APT + +Finally, install CoffeeHouse-SpamDetection by running `python3 setup.py install` + + +# Build Model + +You can update the model build by adding new data to .dat files located in +`model/spam_ham/` then proceed to build the model by running `./build_model`. +This process will product a directory called `spam_ham_build` which you should +copy over to `coffeehouse_spamdetection/` and replace the already existing +files. This process is resource intensive so make sure you are running +this operation on supported chipsets that were manufactured after 2014. + + +## Example Usage +```py +from coffeehouse_spamdetection.main import SpamDetection + +spam_detection = SpamDetection() +spam_detection.predict("Test") +# {'ham': 0.998092, 'spam': 0.0017609089} +``` + + +## Start as server +```shell script +python3 -m coffeehouse_spamdetection --start-server +``` + +This process will run using port `5601` and only accepts POST requests +with the parameter `input` as plain text. You should recieve a JSON +response that looks like this + +```json +{ + "status": true, + "results": { + "ham": "0.998092", + "spam": "0.0017609089" + } +} +``` \ No newline at end of file diff --git a/services/spam_detection/coffeehouse_spamdetection/__init__.py b/services/spam_detection/coffeehouse_spamdetection/__init__.py new file mode 100644 index 00000000..9fde1e9d --- /dev/null +++ b/services/spam_detection/coffeehouse_spamdetection/__init__.py @@ -0,0 +1,7 @@ +from . import main +from .main import * + +from . import server +from .server import * + +__all__ = ["main", "SpamDetection", "Server"] \ No newline at end of file diff --git a/services/spam_detection/coffeehouse_spamdetection/__main__.py b/services/spam_detection/coffeehouse_spamdetection/__main__.py new file mode 100644 index 00000000..68d4aff5 --- /dev/null +++ b/services/spam_detection/coffeehouse_spamdetection/__main__.py @@ -0,0 +1,70 @@ +import sys + +from coffeehouse_spamdetection import SpamDetection +from coffeehouse_spamdetection import Server + + +def _real_main(argv=None): + """ + The main command-line processor + + :param argv: + :return: + """ + if argv[1] == '--help': + _help_menu(argv) + if argv[1] == '--test': + _test_model(argv) + if argv[1] == '--start-server': + _start_server(argv) + + +def _start_server(argv=None): + """ + Starts the server + + :param argv: + :return: + """ + server = Server() + server.start() + + +def _help_menu(argv=None): + """ + Displays the help menu and commandline usage + + :param argv: + :return: + """ + print( + "CoffeeHouse SpamDetection CLI\n\n" + " --help\n" + " --test\n" + " --start-servver rrr\n" + ) + sys.exit() + + +def _test_model(argv=None): + """ + Tests the model's prediction by allowing user input and displaying the + prediction output + + :param argv: + :return: + """ + print("Loading") + spam_detection = SpamDetection() + print("Ready\n") + + while True: + input_text = input("> ") + print(spam_detection.predict(input_text)) + + +if __name__ == '__main__': + try: + _real_main(sys.argv) + except KeyboardInterrupt: + print('\nInterrupted by user') diff --git a/services/spam_detection/coffeehouse_spamdetection/main.py b/services/spam_detection/coffeehouse_spamdetection/main.py new file mode 100644 index 00000000..24c18f86 --- /dev/null +++ b/services/spam_detection/coffeehouse_spamdetection/main.py @@ -0,0 +1,27 @@ +import os +from resource_fetch import ResourceFetch +from coffeehouse_dltc.main import DLTC + +__all__ = ['SpamDetection'] + + +class SpamDetection(object): + + def __init__(self): + """ + Public Constructor + """ + self.dltc = DLTC() + self.rf = ResourceFetch() + self.model_directory = os.path.join(self.rf.fetch("Intellivoid", "CoffeeHouseData-Spam"), 'spam_ham_build') + self.dltc.load_model_cluster(self.model_directory) + + def predict(self, text_input): + """ + Takes the user input and predicts if the input is either + spam or ham + + :param text_input: + :return: Returns dictionary "ham", "spam" prediction values + """ + return self.dltc.predict_from_text(text_input) diff --git a/services/spam_detection/coffeehouse_spamdetection/server.py b/services/spam_detection/coffeehouse_spamdetection/server.py new file mode 100644 index 00000000..8f9b8e29 --- /dev/null +++ b/services/spam_detection/coffeehouse_spamdetection/server.py @@ -0,0 +1,55 @@ +from hyper_internal_service import web + +from coffeehouse_spamdetection import SpamDetection + +__all__ = ['Server'] + + +class Server(object): + + def __init__(self, port=5601): + """ + Public Constructor + :param port: + """ + self.port = port + self.web_application = web.Application() + self.web_application.add_routes( + [web.post('/', self.predict)] + ) + self.spam_detection = SpamDetection() + + async def predict(self, request): + """ + Handles the predict request "/", usage: + POST:: "input": str + :param request: + :return: + """ + post_data = await request.post() + results = self.spam_detection.predict(post_data['input']) + response = { + "status": True, + "results": { + "ham": str(results['ham']), + "spam": str(results['spam']) + } + } + return web.json_response(response) + + def start(self): + """ + Starts the web application + :return: + """ + web.run_app(app=self.web_application, port=self.port) + return True + + def stop(self): + """ + Stops the web application + :return: + """ + self.web_application.shutdown() + self.web_application.cleanup() + return True \ No newline at end of file diff --git a/services/spam_detection/requirements.txt b/services/spam_detection/requirements.txt new file mode 100644 index 00000000..99e1f969 --- /dev/null +++ b/services/spam_detection/requirements.txt @@ -0,0 +1,2 @@ +coffeehouse_dltc +hyper_internal_service \ No newline at end of file diff --git a/services/spam_detection/setup.py b/services/spam_detection/setup.py new file mode 100644 index 00000000..6bb338ca --- /dev/null +++ b/services/spam_detection/setup.py @@ -0,0 +1,31 @@ +from setuptools import setup, find_packages +from setuptools.command.install import install + + +class PostInstallCommand(install): + """Post-installation for installation mode.""" + + def run(self): + install.run(self) + from resource_fetch import ResourceFetch + rf = ResourceFetch() + + # Update the model + rf.fetch("Intellivoid", "CoffeeHouseData-Spam") +setup( + name='coffeehouse_spamdetection', + version='1.0.0', + description='Predicts input to be either spam or ham', + url='https://github.com/Intellivoid/CoffeeHouse-SpamDetection', + author='Zi Xing Narrakas', + author_email='netkas@intellivoid.info', + classifiers=[ + 'Development Status :: 3 - Internal/Alpha', + 'Topic :: Text Processing', + 'Programming Language :: Python :: 3', + ], + cmdclass={ + 'install': PostInstallCommand, + }, + packages=find_packages() +)