Tokenize Comparison

Benchmark created on


Test runner

Ready to run.

Testing in
TestOps/sec
Old Tokenize
export function old_tokenize(txt: string, postfix: boolean = true): Array<string> {
    txt = clean(txt)
    let infix_tokens: Array<string> = txt.split(QUERY_DELIMITER_PATTERN).map(tkn => tkn.trim()).filter(Boolean)
    let i: number = 0
    
    while (i < infix_tokens.length - 1) {
        // AND Operator Auto-Insert
        // Insertion Cases (Inserts Between Two Tokens):
        // 1) Operand-Operand
        // 2) Operand-Open-Bracket
        // 3) Operand-Unary-Operator (Operand-Not)
        // 4) Close-Bracket-Operand
        // 5) Close-Bracket-Open-Bracket
        // 6) Close-Bracket-Unary-Operator (Close-Bracket-Not)
        // This feature exists, so users may optionally type 'and' in the search bar (makes it similar to booru search syntax)

        let current_token: string = infix_tokens[i]
        let next_token: string = infix_tokens[i + 1]

        if ((!(current_token in OPERATORS) && !(next_token in OPERATORS)) ||
            (!(current_token in OPERATORS) && next_token === KEYWORDS.open) ||
            (!(current_token in OPERATORS) && next_token === KEYWORDS.not) ||
            (current_token === KEYWORDS.close && !(next_token in OPERATORS)) ||
            (current_token === KEYWORDS.close && next_token === KEYWORDS.open) ||
            (current_token === KEYWORDS.close && next_token === KEYWORDS.not)) 
            {
            // Tokens that are not operators are assumed to be operands
            infix_tokens.splice(i + 1, 0, KEYWORDS.and)
        }
        i++
    }
    if (!postfix) return infix_tokens

    // Shunting Yard Algorithm (Infix to Postfix Notation)
    infix_tokens.reverse() // So, .pop() can be used, instead of .shift()
    let held_tokens: Array<string> = []
    let postfix_tokens: Array<string> = []

    while (infix_tokens.length > 0) {
        let tkn: string = infix_tokens.pop() as string

        // Operands
        if (!(tkn in OPERATORS)) {
            postfix_tokens.push(tkn)
            continue
        }
        // Operators
        if (held_tokens.length === 0) {
            // Base case
            held_tokens.push(tkn)
            continue
        } else if ((tkn === KEYWORDS.open) || 
                   (tkn === KEYWORDS.not && held_tokens.at(-1) === KEYWORDS.not) ||
                   (OPERATORS[tkn].precedence > OPERATORS[held_tokens.at(-1)!].precedence)) {
            held_tokens.push(tkn)
            continue
        } 
        // Flushing Held Tokens
        while (held_tokens.length > 0) {
            if (OPERATORS[tkn].precedence > OPERATORS[held_tokens.at(-1)!].precedence) {
                break
            } else if (held_tokens.at(-1)! === KEYWORDS.open) {
                // Open brackets are not kept
                held_tokens.pop()
                break
            }
            postfix_tokens.push(held_tokens.pop()!)
        }
        if (tkn !== KEYWORDS.close) {
            // Close brackets are not kept
            held_tokens.push(tkn)
        }
    }
    // Flushing Last Remaining Held Tokens
    while (held_tokens.length > 0) {
        postfix_tokens.push(held_tokens.pop()!)
    }
    return postfix_tokens
}
ready
New Tokenize
export function tokenize(txt: string = '', postfix: boolean = true): Array<string> {
    txt = clean(txt)
    // Reversed, .pop() can be used, instead of .shift(). /\s/.test() is effectively O(1), it's better than .map(...trim())
    let reverse_infix_tokens: Array<string> = txt.split(QUERY_DELIMITER_PATTERN).filter(tkn => !(/\s/.test(tkn))).filter(Boolean).toReversed()
    let infix_tokens: Array<string> = []
    
    while (reverse_infix_tokens.length > 0) {
        // AND Operator Auto-Insert
        // Insertion Cases (Inserts Between Two Tokens):
        // 1) Operand-Operand
        // 2) Operand-Open-Bracket
        // 3) Operand-Unary-Operator (Operand-Not)
        // 4) Close-Bracket-Operand
        // 5) Close-Bracket-Open-Bracket
        // 6) Close-Bracket-Unary-Operator (Close-Bracket-Not)
        // This feature exists, so users may optionally type 'and' in the search bar (makes it similar to booru search syntax)
        infix_tokens.push(reverse_infix_tokens.pop()!) // Pop is used, so data isn't duplicated (better space complexity)

        if (infix_tokens.length < 2) continue // Prevents accessing out of bounds index

        let second_last_tkn: string = infix_tokens.at(-2)!
        let last_tkn: string = infix_tokens.at(-1)!
        
        if ((!(second_last_tkn in OPERATORS) && !(last_tkn in OPERATORS)) ||
            (!(second_last_tkn in OPERATORS) && last_tkn === KEYWORDS.open) ||
            (!(second_last_tkn in OPERATORS) && last_tkn === KEYWORDS.not) ||
            (second_last_tkn === KEYWORDS.close && !(last_tkn in OPERATORS)) ||
            (second_last_tkn === KEYWORDS.close && last_tkn === KEYWORDS.open) ||
            (second_last_tkn === KEYWORDS.close && last_tkn === KEYWORDS.not)) 
            {
            // Tokens that are not operators are assumed to be operands
            infix_tokens.splice(-1, 0, KEYWORDS.and) // Inserting at the end of the array is effectively O(1) here
        }
    }
    if (!postfix) return infix_tokens

    // Shunting Yard Algorithm (Infix to Postfix Notation)
    infix_tokens.reverse() // So, .pop() can be used, instead of .shift()
    let held_tokens: Array<string> = []
    let postfix_tokens: Array<string> = []

    while (infix_tokens.length > 0) {
        let tkn: string = infix_tokens.pop()!

        // Operands
        if (!(tkn in OPERATORS)) {
            postfix_tokens.push(tkn)
            continue
        }
        // Operators
        if (held_tokens.length === 0) {
            // Base case
            held_tokens.push(tkn)
            continue
        } else if ((tkn === KEYWORDS.open) || 
                   (tkn === KEYWORDS.not && held_tokens.at(-1) === KEYWORDS.not) ||
                   (OPERATORS[tkn].precedence > OPERATORS[held_tokens.at(-1)!].precedence)) {
            held_tokens.push(tkn)
            continue
        } 
        // Flushing Held Tokens
        while (held_tokens.length > 0) {
            if (OPERATORS[tkn].precedence > OPERATORS[held_tokens.at(-1)!].precedence) {
                break
            } else if (held_tokens.at(-1)! === KEYWORDS.open) {
                // Open brackets are not kept
                held_tokens.pop()
                break
            }
            postfix_tokens.push(held_tokens.pop()!)
        }
        if (tkn !== KEYWORDS.close) {
            // Close brackets are not kept
            held_tokens.push(tkn)
        }
    }
    // Flushing Last Remaining Held Tokens
    while (held_tokens.length > 0) {
        postfix_tokens.push(held_tokens.pop()!)
    }
    return postfix_tokens
}
ready

Revisions

You can edit these tests or add more tests to this page by appending /edit to the URL.