package opennlp.tools.tokenize;

import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.regex.Pattern;
import opennlp.tools.ml.model.Event;
import opennlp.tools.tokenize.lang.Factory;
import opennlp.tools.util.AbstractEventStream;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;

/* loaded from: classes2.dex */
public class TokSpanEventStream extends AbstractEventStream<TokenSample> {
    private final Pattern alphaNumeric;
    private TokenContextGenerator cg;
    private boolean skipAlphaNumerics;

    public TokSpanEventStream(ObjectStream<TokenSample> objectStream, boolean z) {
        this(objectStream, z, new DefaultTokenContextGenerator());
    }

    public TokSpanEventStream(ObjectStream<TokenSample> objectStream, boolean z, Pattern pattern, TokenContextGenerator tokenContextGenerator) {
        super(objectStream);
        this.alphaNumeric = pattern;
        this.skipAlphaNumerics = z;
        this.cg = tokenContextGenerator;
    }

    public TokSpanEventStream(ObjectStream<TokenSample> objectStream, boolean z, TokenContextGenerator tokenContextGenerator) {
        super(objectStream);
        this.alphaNumeric = new Factory().getAlphanumeric(null);
        this.skipAlphaNumerics = z;
        this.cg = tokenContextGenerator;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // opennlp.tools.util.AbstractEventStream
    public Iterator<Event> createEvents(TokenSample tokenSample) {
        Span[] spanArr;
        int i;
        String str;
        Span[] spanArr2;
        String str2;
        Span[] spanArr3;
        ArrayList arrayList = new ArrayList(50);
        Span[] tokenSpans = tokenSample.getTokenSpans();
        String text = tokenSample.getText();
        if (tokenSpans.length > 0) {
            int start = tokenSpans[0].getStart();
            int i2 = 1;
            String substring = text.substring(start, tokenSpans[tokenSpans.length - 1].getEnd());
            Span[] spanArr4 = WhitespaceTokenizer.INSTANCE.tokenizePos(substring);
            int length = spanArr4.length;
            int i3 = 0;
            int i4 = -1;
            int i5 = -1;
            while (i3 < length) {
                Span span = spanArr4[i3];
                String substring2 = substring.substring(span.getStart(), span.getEnd());
                Span span2 = new Span(span.getStart() + start, span.getEnd() + start);
                if (substring2.length() <= i2 || (this.skipAlphaNumerics && this.alphaNumeric.matcher(substring2).matches())) {
                    spanArr = tokenSpans;
                    i = start;
                    str = substring;
                    spanArr2 = spanArr4;
                } else {
                    int i6 = i4 + 1;
                    int i7 = i4;
                    boolean z = false;
                    while (true) {
                        if (i6 >= tokenSpans.length) {
                            i = start;
                            break;
                        }
                        if (span2.contains(tokenSpans[i6])) {
                            if (!z) {
                                i5 = i6;
                                z = true;
                            }
                            i7 = i6;
                            i = start;
                        } else {
                            i = start;
                            if (span2.getEnd() < tokenSpans[i6].getEnd()) {
                                break;
                            }
                            if (tokenSpans[i6].getEnd() >= span2.getStart()) {
                                PrintStream printStream = System.out;
                                StringBuilder sb = new StringBuilder();
                                str2 = substring;
                                sb.append("Bad training token: ");
                                sb.append(tokenSpans[i6]);
                                sb.append(" cand: ");
                                sb.append(span2);
                                sb.append(" token=");
                                spanArr3 = spanArr4;
                                sb.append(text.substring(tokenSpans[i6].getStart(), tokenSpans[i6].getEnd()));
                                printStream.println(sb.toString());
                                i6++;
                                substring = str2;
                                start = i;
                                spanArr4 = spanArr3;
                            }
                        }
                        str2 = substring;
                        spanArr3 = spanArr4;
                        i6++;
                        substring = str2;
                        start = i;
                        spanArr4 = spanArr3;
                    }
                    str = substring;
                    spanArr2 = spanArr4;
                    if (z) {
                        int i8 = i5;
                        while (i8 <= i7) {
                            Span span3 = tokenSpans[i8];
                            int start2 = span2.getStart();
                            int start3 = span3.getStart() + 1;
                            while (start3 < span3.getEnd()) {
                                arrayList.add(new Event(TokenizerME.NO_SPLIT, this.cg.getContext(substring2, start3 - start2)));
                                start3++;
                                tokenSpans = tokenSpans;
                            }
                            Span[] spanArr5 = tokenSpans;
                            if (span3.getEnd() != span2.getEnd()) {
                                arrayList.add(new Event(TokenizerME.SPLIT, this.cg.getContext(substring2, span3.getEnd() - start2)));
                            }
                            i8++;
                            tokenSpans = spanArr5;
                        }
                    }
                    spanArr = tokenSpans;
                    i4 = i7;
                }
                i3++;
                substring = str;
                start = i;
                tokenSpans = spanArr;
                spanArr4 = spanArr2;
                i2 = 1;
            }
        }
        return arrayList.iterator();
    }
}
