Added SpamDetection

2020-12-27 23:08:53 -05:00 · 2020-12-27 23:08:53 -05:00 · a19b9401e5
parent 70b00a9f34
commit a19b9401e5
10 changed files with 272 additions and 2 deletions
--- a/15
+++ b/15
@ -25,6 +25,9 @@ clean_his:
 clean_langdetect:
 	rm -rf services/language_detection/build services/language_detection/dist services/language_detection/coffeehouse_languagedetection.egg-info
 clean_spamdetect:
 	rm -rf services/spam_detection/build services/spam_detection/dist services/spam_detection/coffeehouse_spamdetection.egg-info
 clean:
 	make clean_apt clean_stopwords clean_tokenizer clean_nlpfr
 	make clean_dltc
@ -32,6 +35,7 @@ clean:
 	make clean_alg
 	make clean_rf
 	make clean_langdetect
 	make clean_spamdetect
 # ======================================================================================================================
@ -66,6 +70,9 @@ build_rf:
 build_langdetect:
 	cd services/language_detection; python3 setup.py build; python3 setup.py sdist
 build_spamdetect:
 	cd services/spam_detection; python3 setup.py build; python3 setup.py sdist
 build:
 	make build_nlpfr
 	make build_his
@ -73,6 +80,7 @@ build:
 	make build_alg
 	make build_rf
 	make build_langdetect
 	make build_spamdetect
 # ======================================================================================================================
@ -108,6 +116,9 @@ install_rf:
 install_langdetect:
 	cd services/language_detection; python3 setup.py install
 install_spamdetect:
 	cd services/spam_detection; python3 setup.py install
 install:
 	make install_rf
 	make install_nlpfr
@ -115,6 +126,7 @@ install:
 	make install_dltc
 	make install_alg
 	make install_langdetect
 	make install_spamdetect
 # ======================================================================================================================
@ -134,3 +146,6 @@ system_prep_gcc:
 start_langdetect:
 	python3 -m coffeehouse_languagedetection --start-server
 start_spamdetect:
 	python3 -m coffeehouse_spamdetection --start-server
--- a/services/language_detection/setup.py
+++ b/services/language_detection/setup.py
@ -1,7 +1,6 @@
 import os
 from setuptools import setup, find_packages
 from setuptools.command.develop import develop
 from setuptools.command.install import install
--- a/services/spam_detection/MANIFEST.in
+++ b/services/spam_detection/MANIFEST.in
@ -0,0 +1,5 @@
 include README.md
 include LICENSE
 include NOTICE
 include MANIFEST.in
 include requirements.txt
--- a/services/spam_detection/README.md
+++ b/services/spam_detection/README.md
@ -0,0 +1,59 @@
 # CoffeeHouse SpamDetection
 Library for detecting spam by classifying input as spam/ham
 ## Installation
 Install the following packages using the corresponding setup and makefile
 operations provided by the repo, or use CoffeeHouse-Server's install script
 to install all the required components
 - Hyper-Internal-Service
 - CoffeeHouse-NLPFR
 - CoffeeHouse-DLTC 
 - CoffeeHouseMod-Tokenizer
 - CoffeeHouseMod-StopWords
 - CoffeeHouseMod-APT
 Finally, install CoffeeHouse-SpamDetection by running `python3 setup.py install`
 # Build Model
 You can update the model build by adding new data to .dat files located in 
 `model/spam_ham/` then proceed to build the model by running `./build_model`.
 This process will product a directory called `spam_ham_build` which you should
 copy over to `coffeehouse_spamdetection/` and replace the already existing
 files. This process is resource intensive so make sure you are running
 this operation on supported chipsets that were manufactured after 2014.
 ## Example Usage
 ```py
 from coffeehouse_spamdetection.main import SpamDetection
 spam_detection = SpamDetection()
 spam_detection.predict("Test")
 # {'ham': 0.998092, 'spam': 0.0017609089}
 ```
 ## Start as server
 ```shell script
 python3 -m coffeehouse_spamdetection --start-server
 ```
 This process will run using port `5601` and only accepts POST requests
 with the parameter `input` as plain text. You should recieve a JSON 
 response that looks like this
 ```json
 {
  "status": true,
  "results": {
    "ham": "0.998092",
    "spam": "0.0017609089"
  }
 }
 ```
--- a/services/spam_detection/coffeehouse_spamdetection/init.py
+++ b/services/spam_detection/coffeehouse_spamdetection/init.py
@ -0,0 +1,7 @@
 from . import main
 from .main import *
 from . import server
 from .server import *
 __all__ = ["main", "SpamDetection", "Server"]
--- a/services/spam_detection/coffeehouse_spamdetection/main.py
+++ b/services/spam_detection/coffeehouse_spamdetection/main.py
@ -0,0 +1,70 @@
 import sys
 from coffeehouse_spamdetection import SpamDetection
 from coffeehouse_spamdetection import Server
 def _real_main(argv=None):
    """
    The main command-line processor
    :param argv:
    :return:
    """
    if argv[1] == '--help':
        _help_menu(argv)
    if argv[1] == '--test':
        _test_model(argv)
    if argv[1] == '--start-server':
        _start_server(argv)
 def _start_server(argv=None):
    """
    Starts the server
    :param argv:
    :return:
    """
    server = Server()
    server.start()
 def _help_menu(argv=None):
    """
    Displays the help menu and commandline usage
    :param argv:
    :return:
    """
    print(
        "CoffeeHouse SpamDetection CLI\n\n"
        "   --help\n"
        "   --test\n"
        "   --start-servver  rrr\n"
    )
    sys.exit()
 def _test_model(argv=None):
    """
    Tests the model's prediction by allowing user input and displaying the
    prediction output
    :param argv:
    :return:
    """
    print("Loading")
    spam_detection = SpamDetection()
    print("Ready\n")
    while True:
        input_text = input("> ")
        print(spam_detection.predict(input_text))
 if __name__ == '__main__':
    try:
        _real_main(sys.argv)
    except KeyboardInterrupt:
        print('\nInterrupted by user')
--- a/services/spam_detection/coffeehouse_spamdetection/main.py
+++ b/services/spam_detection/coffeehouse_spamdetection/main.py
@ -0,0 +1,27 @@
 import os
 from resource_fetch import ResourceFetch
 from coffeehouse_dltc.main import DLTC
 __all__ = ['SpamDetection']
 class SpamDetection(object):
    def __init__(self):
        """
        Public Constructor
        """
        self.dltc = DLTC()
        self.rf = ResourceFetch()
        self.model_directory = os.path.join(self.rf.fetch("Intellivoid", "CoffeeHouseData-Spam"), 'spam_ham_build')
        self.dltc.load_model_cluster(self.model_directory)
    def predict(self, text_input):
        """
        Takes the user input and predicts if the input is either
        spam or ham
        :param text_input:
        :return: Returns dictionary "ham", "spam" prediction values
        """
        return self.dltc.predict_from_text(text_input)
--- a/services/spam_detection/coffeehouse_spamdetection/server.py
+++ b/services/spam_detection/coffeehouse_spamdetection/server.py
@ -0,0 +1,55 @@
 from hyper_internal_service import web
 from coffeehouse_spamdetection import SpamDetection
 __all__ = ['Server']
 class Server(object):
    def __init__(self, port=5601):
        """
        Public Constructor
        :param port:
        """
        self.port = port
        self.web_application = web.Application()
        self.web_application.add_routes(
            [web.post('/', self.predict)]
        )
        self.spam_detection = SpamDetection()
    async def predict(self, request):
        """
        Handles the predict request "/", usage:
        POST:: "input": str
        :param request:
        :return:
        """
        post_data = await request.post()
        results = self.spam_detection.predict(post_data['input'])
        response = {
            "status": True,
            "results": {
                "ham": str(results['ham']),
                "spam": str(results['spam'])
            }
        }
        return web.json_response(response)
    def start(self):
        """
        Starts the web application
        :return:
        """
        web.run_app(app=self.web_application, port=self.port)
        return True
    def stop(self):
        """
        Stops the web application
        :return:
        """
        self.web_application.shutdown()
        self.web_application.cleanup()
        return True
--- a/services/spam_detection/requirements.txt
+++ b/services/spam_detection/requirements.txt
@ -0,0 +1,2 @@
 coffeehouse_dltc
 hyper_internal_service
--- a/services/spam_detection/setup.py
+++ b/services/spam_detection/setup.py
@ -0,0 +1,31 @@
 from setuptools import setup, find_packages
 from setuptools.command.install import install
 class PostInstallCommand(install):
    """Post-installation for installation mode."""
    def run(self):
        install.run(self)
        from resource_fetch import ResourceFetch
        rf = ResourceFetch()
        # Update the model
        rf.fetch("Intellivoid", "CoffeeHouseData-Spam")
 setup(
    name='coffeehouse_spamdetection',
    version='1.0.0',
    description='Predicts input to be either spam or ham',
    url='https://github.com/Intellivoid/CoffeeHouse-SpamDetection',
    author='Zi Xing Narrakas',
    author_email='netkas@intellivoid.info',
    classifiers=[
        'Development Status :: 3 - Internal/Alpha',
        'Topic :: Text Processing',
        'Programming Language :: Python :: 3',
    ],
    cmdclass={
        'install': PostInstallCommand,
    },
    packages=find_packages()
 )