/*
 * Decompiled with CFR 0.152.
 */
package org.pageseeder.diffx.load.text;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.pageseeder.diffx.config.WhiteSpaceProcessing;
import org.pageseeder.diffx.load.text.TextTokenizer;
import org.pageseeder.diffx.token.TextToken;
import org.pageseeder.diffx.token.impl.IgnorableSpaceToken;
import org.pageseeder.diffx.token.impl.SpaceToken;
import org.pageseeder.diffx.token.impl.WordToken;

public final class TokenizerByWord
implements TextTokenizer {
    private final Map<String, TextToken> recycling = new HashMap<String, TextToken>();
    private final WhiteSpaceProcessing whitespace;

    public TokenizerByWord(WhiteSpaceProcessing whitespace) {
        if (whitespace == null) {
            throw new NullPointerException("the white space processing must be specified.");
        }
        this.whitespace = whitespace;
    }

    @Override
    public List<TextToken> tokenize(CharSequence text) {
        String word;
        if (text == null) {
            throw new NullPointerException("Character sequence is null");
        }
        if (text.length() == 0) {
            return Collections.emptyList();
        }
        ArrayList<TextToken> tokens = new ArrayList<TextToken>(text.length());
        Pattern p = Pattern.compile("\\s+");
        Matcher m = p.matcher(text);
        int index = 0;
        while (m.find()) {
            if (index != m.start()) {
                word = text.subSequence(index, m.start()).toString();
                tokens.add(this.getWordEvent(word));
            }
            if (this.whitespace != WhiteSpaceProcessing.IGNORE) {
                String space = text.subSequence(m.start(), m.end()).toString();
                tokens.add(this.getSpaceEvent(space));
            }
            index = m.end();
        }
        if (index != text.length()) {
            word = text.subSequence(index, text.length()).toString();
            tokens.add(this.getWordEvent(word));
        }
        return tokens;
    }

    public static List<TextToken> tokenize(CharSequence seq, WhiteSpaceProcessing whitespace) {
        TokenizerByWord tokenizer = new TokenizerByWord(whitespace);
        return tokenizer.tokenize(seq);
    }

    private TextToken getWordEvent(String word) {
        TextToken token = this.recycling.get(word);
        if (token == null) {
            token = new WordToken(word);
            this.recycling.put(word, token);
        }
        return token;
    }

    private TextToken getSpaceEvent(String space) {
        TextToken token = this.recycling.get(space);
        if (token == null) {
            token = this.whitespace == WhiteSpaceProcessing.PRESERVE ? new IgnorableSpaceToken(space) : SpaceToken.getInstance(space);
            this.recycling.put(space, token);
        }
        return token;
    }
}

