Source code for konlp.tokenize.api

# Copyright (C) 2017 - 0000 KoNLTK project
# Korean Natural Language Toolkit:
# Author: HyunYoung Lee <>
#         GyuHyeon Nam <>
#         Seungshik Kang <>
# URL: <>
# For license information, see LICENSE.TXT
# ============================================================
"""Korean Natural Language Toolkit tonkenizer interface"""

from abc import ABCMeta, abstractmethod
from six import add_metaclass

[docs]@add_metaclass(ABCMeta) class TokenizerI(object): """Tokenizer Interface"""
[docs] @abstractmethod def tokenize(self, string): """Return a tokenized copy of string. Args: string (str): String to tokenize Returns: list(str): Tokenized tokens Raises: NotImplementedError: If not implement this method on a class that extends this class """ raise NotImplementedError()
[docs]class SimpleTokenizer(TokenizerI): """For an example about how to inherit the class above"""
[docs] def tokenize(self, string): """Simple string tokenizer by white-space character Args: string (str): String to tokenize Returns: str: Tokenized tokens """ return string.split()