package de.duehl.basics.text;

/*
 * Copyright 2016 Christian Dühl. All rights reserved.
 *
 * This program is free software. You can redistribute it and/or
 * modify it under the same terms as perl:
 *
 * general:  http://dev.perl.org/licenses/
 * GPL:      http://dev.perl.org/licenses/gpl1.html
 * artistic: http://dev.perl.org/licenses/artistic.html
 */

import java.util.ArrayList;
import java.util.List;

/**
 * Diese Klasse bietet eine Methode, um einen String in Worte, Satzteile, Klammern und Leerräume
 * zu unterteilen.
 *
 * @version 1.01     2016-07-07
 * @author Christian Dühl
 */

public class SentenceSplitter {

    private enum CharacterState {
        START, IN_WORD, WHITESPACE, PUNCTUATION, BRACES
    }

    /**
     * Diese Methode unterteilt einen String in Worte, Satzteile, Klammern und Leerräume.
     *
     * @param text
     *            Aufzuteilender Text.
     * @return Liste mit den Teilen.
     */
    public static List<String> splitWords(String text) {
        CharacterState state = CharacterState.START;
        List<String> list = new ArrayList<String>();

        StringBuilder word = new StringBuilder();
        for (int i = 0; i < text.length(); ++i) {
            final char c = text.charAt(i);
            CharacterState oldState = state;

            if (Text.isWhitespace(c)) {
                state = CharacterState.WHITESPACE;
            }
            else if (Text.isPunctuation(c)) {
                state = CharacterState.PUNCTUATION;
            }
            else if (Text.isBrace(c)) {
                state = CharacterState.BRACES;
            }
            else {
                state = CharacterState.IN_WORD;
            }

            if (oldState != CharacterState.START && state != oldState) {
                list.add(word.toString());
                //word.setLength(0);
                word = new StringBuilder();
            }
            word.append(c);
        }
        list.add(word.toString());
        return list;
    }

}
