Added SpamDetection

2020-12-27 23:08:53 -05:00 · 2020-12-27 23:08:53 -05:00 · a19b9401e5
parent 70b00a9f34
commit a19b9401e5
10 changed files with 272 additions and 2 deletions
--- a/17
+++ b/17
@ -25,6 +25,9 @@ clean_his:
 clean_langdetect:
 	rm -rf services/language_detection/build services/language_detection/dist services/language_detection/coffeehouse_languagedetection.egg-info

+clean_spamdetect:
+	rm -rf services/spam_detection/build services/spam_detection/dist services/spam_detection/coffeehouse_spamdetection.egg-info
+
 clean:
 	make clean_apt clean_stopwords clean_tokenizer clean_nlpfr
 	make clean_dltc
@ -32,6 +35,7 @@ clean:
 	make clean_alg
 	make clean_rf
 	make clean_langdetect
+	make clean_spamdetect

 # ======================================================================================================================

@ -66,6 +70,9 @@ build_rf:
 build_langdetect:
 	cd services/language_detection; python3 setup.py build; python3 setup.py sdist

+build_spamdetect:
+	cd services/spam_detection; python3 setup.py build; python3 setup.py sdist
+
 build:
 	make build_nlpfr
 	make build_his
@ -73,6 +80,7 @@ build:
 	make build_alg
 	make build_rf
 	make build_langdetect
+	make build_spamdetect

 # ======================================================================================================================

@ -108,6 +116,9 @@ install_rf:
 install_langdetect:
 	cd services/language_detection; python3 setup.py install

+install_spamdetect:
+	cd services/spam_detection; python3 setup.py install
+
 install:
 	make install_rf
 	make install_nlpfr
@ -115,6 +126,7 @@ install:
 	make install_dltc
 	make install_alg
 	make install_langdetect
+	make install_spamdetect

 # ======================================================================================================================

@ -133,4 +145,7 @@ system_prep_gcc:
 # ======================================================================================================================

 start_langdetect:
-	python3 -m coffeehouse_languagedetection --start-server
+	python3 -m coffeehouse_languagedetection --start-server
+
+start_spamdetect:
+	python3 -m coffeehouse_spamdetection --start-server
--- a/services/language_detection/setup.py
+++ b/services/language_detection/setup.py
@ -1,7 +1,6 @@
 import os

 from setuptools import setup, find_packages
-from setuptools.command.develop import develop
 from setuptools.command.install import install


--- a/services/spam_detection/MANIFEST.in
+++ b/services/spam_detection/MANIFEST.in
@ -0,0 +1,5 @@
+include README.md
+include LICENSE
+include NOTICE
+include MANIFEST.in
+include requirements.txt
--- a/services/spam_detection/README.md
+++ b/services/spam_detection/README.md
@ -0,0 +1,59 @@
+# CoffeeHouse SpamDetection
+
+Library for detecting spam by classifying input as spam/ham
+
+
+## Installation
+
+Install the following packages using the corresponding setup and makefile
+operations provided by the repo, or use CoffeeHouse-Server's install script
+to install all the required components
+
+ - Hyper-Internal-Service
+ - CoffeeHouse-NLPFR
+ - CoffeeHouse-DLTC 
+ - CoffeeHouseMod-Tokenizer
+ - CoffeeHouseMod-StopWords
+ - CoffeeHouseMod-APT
+ 
+Finally, install CoffeeHouse-SpamDetection by running `python3 setup.py install`
+
+
+# Build Model
+
+You can update the model build by adding new data to .dat files located in 
+`model/spam_ham/` then proceed to build the model by running `./build_model`.
+This process will product a directory called `spam_ham_build` which you should
+copy over to `coffeehouse_spamdetection/` and replace the already existing
+files. This process is resource intensive so make sure you are running
+this operation on supported chipsets that were manufactured after 2014.
+
+
+## Example Usage
+```py
+from coffeehouse_spamdetection.main import SpamDetection
+
+spam_detection = SpamDetection()
+spam_detection.predict("Test")
+# {'ham': 0.998092, 'spam': 0.0017609089}
+```
+
+
+## Start as server
+```shell script
+python3 -m coffeehouse_spamdetection --start-server
+```
+
+This process will run using port `5601` and only accepts POST requests
+with the parameter `input` as plain text. You should recieve a JSON 
+response that looks like this
+
+```json
+{
+  "status": true,
+  "results": {
+    "ham": "0.998092",
+    "spam": "0.0017609089"
+  }
+}
+```
--- a/services/spam_detection/coffeehouse_spamdetection/init.py
+++ b/services/spam_detection/coffeehouse_spamdetection/init.py
@ -0,0 +1,7 @@
+from . import main
+from .main import *
+
+from . import server
+from .server import *
+
+__all__ = ["main", "SpamDetection", "Server"]
--- a/services/spam_detection/coffeehouse_spamdetection/main.py
+++ b/services/spam_detection/coffeehouse_spamdetection/main.py
@ -0,0 +1,70 @@
+import sys
+
+from coffeehouse_spamdetection import SpamDetection
+from coffeehouse_spamdetection import Server
+
+
+def _real_main(argv=None):
+    """
+    The main command-line processor
+
+    :param argv:
+    :return:
+    """
+    if argv[1] == '--help':
+        _help_menu(argv)
+    if argv[1] == '--test':
+        _test_model(argv)
+    if argv[1] == '--start-server':
+        _start_server(argv)
+
+
+def _start_server(argv=None):
+    """
+    Starts the server
+
+    :param argv:
+    :return:
+    """
+    server = Server()
+    server.start()
+
+
+def _help_menu(argv=None):
+    """
+    Displays the help menu and commandline usage
+
+    :param argv:
+    :return:
+    """
+    print(
+        "CoffeeHouse SpamDetection CLI\n\n"
+        "   --help\n"
+        "   --test\n"
+        "   --start-servver  rrr\n"
+    )
+    sys.exit()
+
+
+def _test_model(argv=None):
+    """
+    Tests the model's prediction by allowing user input and displaying the
+    prediction output
+
+    :param argv:
+    :return:
+    """
+    print("Loading")
+    spam_detection = SpamDetection()
+    print("Ready\n")
+
+    while True:
+        input_text = input("> ")
+        print(spam_detection.predict(input_text))
+
+
+if __name__ == '__main__':
+    try:
+        _real_main(sys.argv)
+    except KeyboardInterrupt:
+        print('\nInterrupted by user')
--- a/services/spam_detection/coffeehouse_spamdetection/main.py
+++ b/services/spam_detection/coffeehouse_spamdetection/main.py
@ -0,0 +1,27 @@
+import os
+from resource_fetch import ResourceFetch
+from coffeehouse_dltc.main import DLTC
+
+__all__ = ['SpamDetection']
+
+
+class SpamDetection(object):
+
+    def __init__(self):
+        """
+        Public Constructor
+        """
+        self.dltc = DLTC()
+        self.rf = ResourceFetch()
+        self.model_directory = os.path.join(self.rf.fetch("Intellivoid", "CoffeeHouseData-Spam"), 'spam_ham_build')
+        self.dltc.load_model_cluster(self.model_directory)
+
+    def predict(self, text_input):
+        """
+        Takes the user input and predicts if the input is either
+        spam or ham
+
+        :param text_input:
+        :return: Returns dictionary "ham", "spam" prediction values
+        """
+        return self.dltc.predict_from_text(text_input)
--- a/services/spam_detection/coffeehouse_spamdetection/server.py
+++ b/services/spam_detection/coffeehouse_spamdetection/server.py
@ -0,0 +1,55 @@
+from hyper_internal_service import web
+
+from coffeehouse_spamdetection import SpamDetection
+
+__all__ = ['Server']
+
+
+class Server(object):
+
+    def __init__(self, port=5601):
+        """
+        Public Constructor
+        :param port:
+        """
+        self.port = port
+        self.web_application = web.Application()
+        self.web_application.add_routes(
+            [web.post('/', self.predict)]
+        )
+        self.spam_detection = SpamDetection()
+
+    async def predict(self, request):
+        """
+        Handles the predict request "/", usage:
+        POST:: "input": str
+        :param request:
+        :return:
+        """
+        post_data = await request.post()
+        results = self.spam_detection.predict(post_data['input'])
+        response = {
+            "status": True,
+            "results": {
+                "ham": str(results['ham']),
+                "spam": str(results['spam'])
+            }
+        }
+        return web.json_response(response)
+
+    def start(self):
+        """
+        Starts the web application
+        :return:
+        """
+        web.run_app(app=self.web_application, port=self.port)
+        return True
+
+    def stop(self):
+        """
+        Stops the web application
+        :return:
+        """
+        self.web_application.shutdown()
+        self.web_application.cleanup()
+        return True
--- a/services/spam_detection/requirements.txt
+++ b/services/spam_detection/requirements.txt
@ -0,0 +1,2 @@
+coffeehouse_dltc
+hyper_internal_service
--- a/services/spam_detection/setup.py
+++ b/services/spam_detection/setup.py
@ -0,0 +1,31 @@
+from setuptools import setup, find_packages
+from setuptools.command.install import install
+
+
+class PostInstallCommand(install):
+    """Post-installation for installation mode."""
+
+    def run(self):
+        install.run(self)
+        from resource_fetch import ResourceFetch
+        rf = ResourceFetch()
+
+        # Update the model
+        rf.fetch("Intellivoid", "CoffeeHouseData-Spam")
+setup(
+    name='coffeehouse_spamdetection',
+    version='1.0.0',
+    description='Predicts input to be either spam or ham',
+    url='https://github.com/Intellivoid/CoffeeHouse-SpamDetection',
+    author='Zi Xing Narrakas',
+    author_email='netkas@intellivoid.info',
+    classifiers=[
+        'Development Status :: 3 - Internal/Alpha',
+        'Topic :: Text Processing',
+        'Programming Language :: Python :: 3',
+    ],
+    cmdclass={
+        'install': PostInstallCommand,
+    },
+    packages=find_packages()
+)