Added SpamDetection

This commit is contained in:
netkas 2020-12-27 23:08:53 -05:00
parent 70b00a9f34
commit a19b9401e5
10 changed files with 272 additions and 2 deletions

View File

@ -25,6 +25,9 @@ clean_his:
clean_langdetect:
rm -rf services/language_detection/build services/language_detection/dist services/language_detection/coffeehouse_languagedetection.egg-info
clean_spamdetect:
rm -rf services/spam_detection/build services/spam_detection/dist services/spam_detection/coffeehouse_spamdetection.egg-info
clean:
make clean_apt clean_stopwords clean_tokenizer clean_nlpfr
make clean_dltc
@ -32,6 +35,7 @@ clean:
make clean_alg
make clean_rf
make clean_langdetect
make clean_spamdetect
# ======================================================================================================================
@ -66,6 +70,9 @@ build_rf:
build_langdetect:
cd services/language_detection; python3 setup.py build; python3 setup.py sdist
build_spamdetect:
cd services/spam_detection; python3 setup.py build; python3 setup.py sdist
build:
make build_nlpfr
make build_his
@ -73,6 +80,7 @@ build:
make build_alg
make build_rf
make build_langdetect
make build_spamdetect
# ======================================================================================================================
@ -108,6 +116,9 @@ install_rf:
install_langdetect:
cd services/language_detection; python3 setup.py install
install_spamdetect:
cd services/spam_detection; python3 setup.py install
install:
make install_rf
make install_nlpfr
@ -115,6 +126,7 @@ install:
make install_dltc
make install_alg
make install_langdetect
make install_spamdetect
# ======================================================================================================================
@ -133,4 +145,7 @@ system_prep_gcc:
# ======================================================================================================================
start_langdetect:
python3 -m coffeehouse_languagedetection --start-server
python3 -m coffeehouse_languagedetection --start-server
start_spamdetect:
python3 -m coffeehouse_spamdetection --start-server

View File

@ -1,7 +1,6 @@
import os
from setuptools import setup, find_packages
from setuptools.command.develop import develop
from setuptools.command.install import install

View File

@ -0,0 +1,5 @@
include README.md
include LICENSE
include NOTICE
include MANIFEST.in
include requirements.txt

View File

@ -0,0 +1,59 @@
# CoffeeHouse SpamDetection
Library for detecting spam by classifying input as spam/ham
## Installation
Install the following packages using the corresponding setup and makefile
operations provided by the repo, or use CoffeeHouse-Server's install script
to install all the required components
- Hyper-Internal-Service
- CoffeeHouse-NLPFR
- CoffeeHouse-DLTC
- CoffeeHouseMod-Tokenizer
- CoffeeHouseMod-StopWords
- CoffeeHouseMod-APT
Finally, install CoffeeHouse-SpamDetection by running `python3 setup.py install`
# Build Model
You can update the model build by adding new data to .dat files located in
`model/spam_ham/` then proceed to build the model by running `./build_model`.
This process will product a directory called `spam_ham_build` which you should
copy over to `coffeehouse_spamdetection/` and replace the already existing
files. This process is resource intensive so make sure you are running
this operation on supported chipsets that were manufactured after 2014.
## Example Usage
```py
from coffeehouse_spamdetection.main import SpamDetection
spam_detection = SpamDetection()
spam_detection.predict("Test")
# {'ham': 0.998092, 'spam': 0.0017609089}
```
## Start as server
```shell script
python3 -m coffeehouse_spamdetection --start-server
```
This process will run using port `5601` and only accepts POST requests
with the parameter `input` as plain text. You should recieve a JSON
response that looks like this
```json
{
"status": true,
"results": {
"ham": "0.998092",
"spam": "0.0017609089"
}
}
```

View File

@ -0,0 +1,7 @@
from . import main
from .main import *
from . import server
from .server import *
__all__ = ["main", "SpamDetection", "Server"]

View File

@ -0,0 +1,70 @@
import sys
from coffeehouse_spamdetection import SpamDetection
from coffeehouse_spamdetection import Server
def _real_main(argv=None):
"""
The main command-line processor
:param argv:
:return:
"""
if argv[1] == '--help':
_help_menu(argv)
if argv[1] == '--test':
_test_model(argv)
if argv[1] == '--start-server':
_start_server(argv)
def _start_server(argv=None):
"""
Starts the server
:param argv:
:return:
"""
server = Server()
server.start()
def _help_menu(argv=None):
"""
Displays the help menu and commandline usage
:param argv:
:return:
"""
print(
"CoffeeHouse SpamDetection CLI\n\n"
" --help\n"
" --test\n"
" --start-servver rrr\n"
)
sys.exit()
def _test_model(argv=None):
"""
Tests the model's prediction by allowing user input and displaying the
prediction output
:param argv:
:return:
"""
print("Loading")
spam_detection = SpamDetection()
print("Ready\n")
while True:
input_text = input("> ")
print(spam_detection.predict(input_text))
if __name__ == '__main__':
try:
_real_main(sys.argv)
except KeyboardInterrupt:
print('\nInterrupted by user')

View File

@ -0,0 +1,27 @@
import os
from resource_fetch import ResourceFetch
from coffeehouse_dltc.main import DLTC
__all__ = ['SpamDetection']
class SpamDetection(object):
def __init__(self):
"""
Public Constructor
"""
self.dltc = DLTC()
self.rf = ResourceFetch()
self.model_directory = os.path.join(self.rf.fetch("Intellivoid", "CoffeeHouseData-Spam"), 'spam_ham_build')
self.dltc.load_model_cluster(self.model_directory)
def predict(self, text_input):
"""
Takes the user input and predicts if the input is either
spam or ham
:param text_input:
:return: Returns dictionary "ham", "spam" prediction values
"""
return self.dltc.predict_from_text(text_input)

View File

@ -0,0 +1,55 @@
from hyper_internal_service import web
from coffeehouse_spamdetection import SpamDetection
__all__ = ['Server']
class Server(object):
def __init__(self, port=5601):
"""
Public Constructor
:param port:
"""
self.port = port
self.web_application = web.Application()
self.web_application.add_routes(
[web.post('/', self.predict)]
)
self.spam_detection = SpamDetection()
async def predict(self, request):
"""
Handles the predict request "/", usage:
POST:: "input": str
:param request:
:return:
"""
post_data = await request.post()
results = self.spam_detection.predict(post_data['input'])
response = {
"status": True,
"results": {
"ham": str(results['ham']),
"spam": str(results['spam'])
}
}
return web.json_response(response)
def start(self):
"""
Starts the web application
:return:
"""
web.run_app(app=self.web_application, port=self.port)
return True
def stop(self):
"""
Stops the web application
:return:
"""
self.web_application.shutdown()
self.web_application.cleanup()
return True

View File

@ -0,0 +1,2 @@
coffeehouse_dltc
hyper_internal_service

View File

@ -0,0 +1,31 @@
from setuptools import setup, find_packages
from setuptools.command.install import install
class PostInstallCommand(install):
"""Post-installation for installation mode."""
def run(self):
install.run(self)
from resource_fetch import ResourceFetch
rf = ResourceFetch()
# Update the model
rf.fetch("Intellivoid", "CoffeeHouseData-Spam")
setup(
name='coffeehouse_spamdetection',
version='1.0.0',
description='Predicts input to be either spam or ham',
url='https://github.com/Intellivoid/CoffeeHouse-SpamDetection',
author='Zi Xing Narrakas',
author_email='netkas@intellivoid.info',
classifiers=[
'Development Status :: 3 - Internal/Alpha',
'Topic :: Text Processing',
'Programming Language :: Python :: 3',
],
cmdclass={
'install': PostInstallCommand,
},
packages=find_packages()
)