Added SpamDetection
This commit is contained in:
parent
70b00a9f34
commit
a19b9401e5
15
Makefile
15
Makefile
|
@ -25,6 +25,9 @@ clean_his:
|
||||||
clean_langdetect:
|
clean_langdetect:
|
||||||
rm -rf services/language_detection/build services/language_detection/dist services/language_detection/coffeehouse_languagedetection.egg-info
|
rm -rf services/language_detection/build services/language_detection/dist services/language_detection/coffeehouse_languagedetection.egg-info
|
||||||
|
|
||||||
|
clean_spamdetect:
|
||||||
|
rm -rf services/spam_detection/build services/spam_detection/dist services/spam_detection/coffeehouse_spamdetection.egg-info
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
make clean_apt clean_stopwords clean_tokenizer clean_nlpfr
|
make clean_apt clean_stopwords clean_tokenizer clean_nlpfr
|
||||||
make clean_dltc
|
make clean_dltc
|
||||||
|
@ -32,6 +35,7 @@ clean:
|
||||||
make clean_alg
|
make clean_alg
|
||||||
make clean_rf
|
make clean_rf
|
||||||
make clean_langdetect
|
make clean_langdetect
|
||||||
|
make clean_spamdetect
|
||||||
|
|
||||||
# ======================================================================================================================
|
# ======================================================================================================================
|
||||||
|
|
||||||
|
@ -66,6 +70,9 @@ build_rf:
|
||||||
build_langdetect:
|
build_langdetect:
|
||||||
cd services/language_detection; python3 setup.py build; python3 setup.py sdist
|
cd services/language_detection; python3 setup.py build; python3 setup.py sdist
|
||||||
|
|
||||||
|
build_spamdetect:
|
||||||
|
cd services/spam_detection; python3 setup.py build; python3 setup.py sdist
|
||||||
|
|
||||||
build:
|
build:
|
||||||
make build_nlpfr
|
make build_nlpfr
|
||||||
make build_his
|
make build_his
|
||||||
|
@ -73,6 +80,7 @@ build:
|
||||||
make build_alg
|
make build_alg
|
||||||
make build_rf
|
make build_rf
|
||||||
make build_langdetect
|
make build_langdetect
|
||||||
|
make build_spamdetect
|
||||||
|
|
||||||
# ======================================================================================================================
|
# ======================================================================================================================
|
||||||
|
|
||||||
|
@ -108,6 +116,9 @@ install_rf:
|
||||||
install_langdetect:
|
install_langdetect:
|
||||||
cd services/language_detection; python3 setup.py install
|
cd services/language_detection; python3 setup.py install
|
||||||
|
|
||||||
|
install_spamdetect:
|
||||||
|
cd services/spam_detection; python3 setup.py install
|
||||||
|
|
||||||
install:
|
install:
|
||||||
make install_rf
|
make install_rf
|
||||||
make install_nlpfr
|
make install_nlpfr
|
||||||
|
@ -115,6 +126,7 @@ install:
|
||||||
make install_dltc
|
make install_dltc
|
||||||
make install_alg
|
make install_alg
|
||||||
make install_langdetect
|
make install_langdetect
|
||||||
|
make install_spamdetect
|
||||||
|
|
||||||
# ======================================================================================================================
|
# ======================================================================================================================
|
||||||
|
|
||||||
|
@ -134,3 +146,6 @@ system_prep_gcc:
|
||||||
|
|
||||||
start_langdetect:
|
start_langdetect:
|
||||||
python3 -m coffeehouse_languagedetection --start-server
|
python3 -m coffeehouse_languagedetection --start-server
|
||||||
|
|
||||||
|
start_spamdetect:
|
||||||
|
python3 -m coffeehouse_spamdetection --start-server
|
|
@ -1,7 +1,6 @@
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from setuptools import setup, find_packages
|
from setuptools import setup, find_packages
|
||||||
from setuptools.command.develop import develop
|
|
||||||
from setuptools.command.install import install
|
from setuptools.command.install import install
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
include README.md
|
||||||
|
include LICENSE
|
||||||
|
include NOTICE
|
||||||
|
include MANIFEST.in
|
||||||
|
include requirements.txt
|
|
@ -0,0 +1,59 @@
|
||||||
|
# CoffeeHouse SpamDetection
|
||||||
|
|
||||||
|
Library for detecting spam by classifying input as spam/ham
|
||||||
|
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
Install the following packages using the corresponding setup and makefile
|
||||||
|
operations provided by the repo, or use CoffeeHouse-Server's install script
|
||||||
|
to install all the required components
|
||||||
|
|
||||||
|
- Hyper-Internal-Service
|
||||||
|
- CoffeeHouse-NLPFR
|
||||||
|
- CoffeeHouse-DLTC
|
||||||
|
- CoffeeHouseMod-Tokenizer
|
||||||
|
- CoffeeHouseMod-StopWords
|
||||||
|
- CoffeeHouseMod-APT
|
||||||
|
|
||||||
|
Finally, install CoffeeHouse-SpamDetection by running `python3 setup.py install`
|
||||||
|
|
||||||
|
|
||||||
|
# Build Model
|
||||||
|
|
||||||
|
You can update the model build by adding new data to .dat files located in
|
||||||
|
`model/spam_ham/` then proceed to build the model by running `./build_model`.
|
||||||
|
This process will product a directory called `spam_ham_build` which you should
|
||||||
|
copy over to `coffeehouse_spamdetection/` and replace the already existing
|
||||||
|
files. This process is resource intensive so make sure you are running
|
||||||
|
this operation on supported chipsets that were manufactured after 2014.
|
||||||
|
|
||||||
|
|
||||||
|
## Example Usage
|
||||||
|
```py
|
||||||
|
from coffeehouse_spamdetection.main import SpamDetection
|
||||||
|
|
||||||
|
spam_detection = SpamDetection()
|
||||||
|
spam_detection.predict("Test")
|
||||||
|
# {'ham': 0.998092, 'spam': 0.0017609089}
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Start as server
|
||||||
|
```shell script
|
||||||
|
python3 -m coffeehouse_spamdetection --start-server
|
||||||
|
```
|
||||||
|
|
||||||
|
This process will run using port `5601` and only accepts POST requests
|
||||||
|
with the parameter `input` as plain text. You should recieve a JSON
|
||||||
|
response that looks like this
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"status": true,
|
||||||
|
"results": {
|
||||||
|
"ham": "0.998092",
|
||||||
|
"spam": "0.0017609089"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
|
@ -0,0 +1,7 @@
|
||||||
|
from . import main
|
||||||
|
from .main import *
|
||||||
|
|
||||||
|
from . import server
|
||||||
|
from .server import *
|
||||||
|
|
||||||
|
__all__ = ["main", "SpamDetection", "Server"]
|
|
@ -0,0 +1,70 @@
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from coffeehouse_spamdetection import SpamDetection
|
||||||
|
from coffeehouse_spamdetection import Server
|
||||||
|
|
||||||
|
|
||||||
|
def _real_main(argv=None):
|
||||||
|
"""
|
||||||
|
The main command-line processor
|
||||||
|
|
||||||
|
:param argv:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
if argv[1] == '--help':
|
||||||
|
_help_menu(argv)
|
||||||
|
if argv[1] == '--test':
|
||||||
|
_test_model(argv)
|
||||||
|
if argv[1] == '--start-server':
|
||||||
|
_start_server(argv)
|
||||||
|
|
||||||
|
|
||||||
|
def _start_server(argv=None):
|
||||||
|
"""
|
||||||
|
Starts the server
|
||||||
|
|
||||||
|
:param argv:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
server = Server()
|
||||||
|
server.start()
|
||||||
|
|
||||||
|
|
||||||
|
def _help_menu(argv=None):
|
||||||
|
"""
|
||||||
|
Displays the help menu and commandline usage
|
||||||
|
|
||||||
|
:param argv:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
print(
|
||||||
|
"CoffeeHouse SpamDetection CLI\n\n"
|
||||||
|
" --help\n"
|
||||||
|
" --test\n"
|
||||||
|
" --start-servver rrr\n"
|
||||||
|
)
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
|
def _test_model(argv=None):
|
||||||
|
"""
|
||||||
|
Tests the model's prediction by allowing user input and displaying the
|
||||||
|
prediction output
|
||||||
|
|
||||||
|
:param argv:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
print("Loading")
|
||||||
|
spam_detection = SpamDetection()
|
||||||
|
print("Ready\n")
|
||||||
|
|
||||||
|
while True:
|
||||||
|
input_text = input("> ")
|
||||||
|
print(spam_detection.predict(input_text))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
try:
|
||||||
|
_real_main(sys.argv)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print('\nInterrupted by user')
|
|
@ -0,0 +1,27 @@
|
||||||
|
import os
|
||||||
|
from resource_fetch import ResourceFetch
|
||||||
|
from coffeehouse_dltc.main import DLTC
|
||||||
|
|
||||||
|
__all__ = ['SpamDetection']
|
||||||
|
|
||||||
|
|
||||||
|
class SpamDetection(object):
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
"""
|
||||||
|
Public Constructor
|
||||||
|
"""
|
||||||
|
self.dltc = DLTC()
|
||||||
|
self.rf = ResourceFetch()
|
||||||
|
self.model_directory = os.path.join(self.rf.fetch("Intellivoid", "CoffeeHouseData-Spam"), 'spam_ham_build')
|
||||||
|
self.dltc.load_model_cluster(self.model_directory)
|
||||||
|
|
||||||
|
def predict(self, text_input):
|
||||||
|
"""
|
||||||
|
Takes the user input and predicts if the input is either
|
||||||
|
spam or ham
|
||||||
|
|
||||||
|
:param text_input:
|
||||||
|
:return: Returns dictionary "ham", "spam" prediction values
|
||||||
|
"""
|
||||||
|
return self.dltc.predict_from_text(text_input)
|
|
@ -0,0 +1,55 @@
|
||||||
|
from hyper_internal_service import web
|
||||||
|
|
||||||
|
from coffeehouse_spamdetection import SpamDetection
|
||||||
|
|
||||||
|
__all__ = ['Server']
|
||||||
|
|
||||||
|
|
||||||
|
class Server(object):
|
||||||
|
|
||||||
|
def __init__(self, port=5601):
|
||||||
|
"""
|
||||||
|
Public Constructor
|
||||||
|
:param port:
|
||||||
|
"""
|
||||||
|
self.port = port
|
||||||
|
self.web_application = web.Application()
|
||||||
|
self.web_application.add_routes(
|
||||||
|
[web.post('/', self.predict)]
|
||||||
|
)
|
||||||
|
self.spam_detection = SpamDetection()
|
||||||
|
|
||||||
|
async def predict(self, request):
|
||||||
|
"""
|
||||||
|
Handles the predict request "/", usage:
|
||||||
|
POST:: "input": str
|
||||||
|
:param request:
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
post_data = await request.post()
|
||||||
|
results = self.spam_detection.predict(post_data['input'])
|
||||||
|
response = {
|
||||||
|
"status": True,
|
||||||
|
"results": {
|
||||||
|
"ham": str(results['ham']),
|
||||||
|
"spam": str(results['spam'])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return web.json_response(response)
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
"""
|
||||||
|
Starts the web application
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
web.run_app(app=self.web_application, port=self.port)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def stop(self):
|
||||||
|
"""
|
||||||
|
Stops the web application
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
self.web_application.shutdown()
|
||||||
|
self.web_application.cleanup()
|
||||||
|
return True
|
|
@ -0,0 +1,2 @@
|
||||||
|
coffeehouse_dltc
|
||||||
|
hyper_internal_service
|
|
@ -0,0 +1,31 @@
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
from setuptools.command.install import install
|
||||||
|
|
||||||
|
|
||||||
|
class PostInstallCommand(install):
|
||||||
|
"""Post-installation for installation mode."""
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
install.run(self)
|
||||||
|
from resource_fetch import ResourceFetch
|
||||||
|
rf = ResourceFetch()
|
||||||
|
|
||||||
|
# Update the model
|
||||||
|
rf.fetch("Intellivoid", "CoffeeHouseData-Spam")
|
||||||
|
setup(
|
||||||
|
name='coffeehouse_spamdetection',
|
||||||
|
version='1.0.0',
|
||||||
|
description='Predicts input to be either spam or ham',
|
||||||
|
url='https://github.com/Intellivoid/CoffeeHouse-SpamDetection',
|
||||||
|
author='Zi Xing Narrakas',
|
||||||
|
author_email='netkas@intellivoid.info',
|
||||||
|
classifiers=[
|
||||||
|
'Development Status :: 3 - Internal/Alpha',
|
||||||
|
'Topic :: Text Processing',
|
||||||
|
'Programming Language :: Python :: 3',
|
||||||
|
],
|
||||||
|
cmdclass={
|
||||||
|
'install': PostInstallCommand,
|
||||||
|
},
|
||||||
|
packages=find_packages()
|
||||||
|
)
|
Loading…
Reference in New Issue