NNExperiments/src/util/feature_extraction/emoji.nim

125 lines
3.8 KiB
Nim

# Copyright 2023 Mattia Giambirtone & All Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## Emoji parsing/removal utility
## Code refactored and adapted from https://github.com/thecodedmind/nimoj
import json
import os
import httpclient
import tables
import strutils
import re
import strformat
const emojiEndpoint = "https://raw.githubusercontent.com/omnidan/node-emoji/master/lib/emoji.json"
const emojiPath = "assets/emojis.json"
type Emojizer* = ref object
## A wrapper that saves
## us parsing the JSON file
## everytime
emojiList: JsonNode
proc newEmojizer*: Emojizer =
## Returns a new Emojizer object
new(result)
result.emojiList = newJObject()
proc downloadEmojiList =
## Downloads the list of emojis and saves
## it to a JSON file
let client = newHttpClient()
let list = client.getContent(emojiEndpoint)
writeFile(emojiPath, list)
proc getEmojiJson(self: Emojizer): JsonNode =
## Returns the JSON object parsed from
## te emoji file
if not fileExists(emoji_path):
downloadEmojiList()
var temp: string
temp.toUgly(self.emojiList)
if temp == "{}":
self.emojiList = parseFile(emojiPath)
result = self.emojiList
proc findEmoji*(self: Emojizer, part: string): string =
## Searches the emoji dict for emoji names matching
## search, returns at the first hit
for k, v in self.getEmojiJson().getFields().pairs:
if part.replace(re":") in k:
return v.getStr()
proc findEmojis*(self: Emojizer, part: string): seq[string] =
## Searches the emoji dict for emoji names matching
## search, returns all hits
for k, v in self.getEmojiJson().getFields().pairs:
if part.replace(re":") in k:
result.add(v.getStr())
proc findEmojiCodes*(self: Emojizer, part: string): seq[string] =
## Similar to findEmojis, but looks for emoji codes instead
## (such as :happy: or :santa:)
for k, v in self.getEmojiJson().getFields().pairs:
if part.replace(re":") in k:
result.add(k)
proc getEmoji*(self: Emojizer, emoji: string, default: string = "unknown_emoji"): string =
## Searches the emoji dict for emojis matching the search
return self.getEmojiJson(){emoji.replace(re":")}.getStr(default)
proc getEmojiCode*(self: Emojizer, emoji: string): string =
## Searches the emoji dict for emojis matching the search
for k, v in self.getEmojiJson().getFields().pairs:
if emoji == v.getStr():
return k
proc emojize*(self: Emojizer, msg: string): string =
## Searches string for emoji tokens, such as `:santa:`
## and swaps in the emoji character
result = msg
var emojis = msg.findAll(re":(.+?):")
for emoji in emojis:
let e = self.getEmoji(emoji)
if e != "":
result = msg.replace(emoji, e)
proc demojize*(self: Emojizer, msg: string, strip: bool): string =
## Searches string for emoji characters.
## If strip is false, this function swaps
## in the emoji token, such as :santa:
## otherwise the emoji is simply removed
result = msg
for k, v in self.getEmojiJson().getFields().pairs:
if v.getStr in result:
if strip:
result = msg.replace(v.getStr(), "")
else:
result = msg.replace(v.getStr(), &":{self.getEmojiCode(v.getStr())}:")