LeetCode Drawing - Day 3

395. Longest Substring with At Least K Repeating Characters

Take aways

Algorithm

  • Devise a way to calculate the character frequency in s[i, j]
  • Use prefixSum arrays for each character in the alphabet

Coding

  • Good: Defined predicate functions for readability
  • Good: Described what freqSum(prefixSum, i, j) means as comment
  • Good: Stop digging into impossible substrings
  • Should have done: ensure to update j and freqsum at the end of each while loop
alphabet = "abcdefghijklmnopqrstuvwxyz"

def freqSum(prefixSum, i, j):
    # Returns freqSum of string s[i:j]
    # Ex
    #  012345
    # "aaabba"
    # p[0] = {}
    # p[1] = {a:1}
    # p[4] = {a:3,b:1}
    # freqSum(p, 1, 4) = {a:2, b:1}
    ans = {}
    for alphabetChar in alphabet:
        if i < j:
            ans[alphabetChar] = prefixSum[alphabetChar][j] - prefixSum[alphabetChar][i]
        else:
            ans[alphabetChar] = 0
    return ans

def isPossible(freqsum, k):
    for alphabetChar in alphabet:
        if freqsum[alphabetChar] >= k:
            return True
    return False

def isValid(freqsum, k):
    for alphabetChar in alphabet:
        if 0 < freqsum[alphabetChar] and freqsum[alphabetChar] < k:
            return False
    return True

class Solution:    
    def longestSubstring(self, s: str, k: int) -> int:
        # O(n^2) Solution
        # prefixSum['a'][i] = m ... freq of a in s[0:i]
        # Note: prefixSum['a'][0] = 0 ... same for 'a'..'z'
        prefixSum = {}
        for alphabetChar in alphabet:
            prefixSum[alphabetChar] = [0]
        for c in s:
            for alphabetChar in alphabet:
                prevValue = prefixSum[alphabetChar][-1]
                newValue = prevValue + 1 if c == alphabetChar else prevValue
                prefixSum[alphabetChar].append(newValue)
        maxLen = 0
        for i in range(len(s)):
            j = len(s)
            freqsum = freqSum(prefixSum, i, j)
            while isPossible(freqsum, k):
                if isValid(freqsum, k):
                    maxLen = max(maxLen, j-i)
                    break
                j -= 1
                freqsum = freqSum(prefixSum, i, j)
        return maxLen

Update: O(N) solution

Found the O(N) solution in the discussion and I implemented the stategy in Python 3.

from collections import defaultdict
from enum import Enum

class SubstringState(Enum):
    InsufficientlyUnique = 1
    ExcessivelyUnique = 2
    SomeCharLessThanK = 3
    AllAtLeastK = 4

def getSubstringState(freqDict: defaultdict, k: int, m: int) -> SubstringState:
    kinds = 0
    allAtLeastK = True
    for v in freqDict.values():
        if v > 0:
            kinds += 1
            if v < k:
                allAtLeastK = False
    if kinds < m:
        return SubstringState.InsufficientlyUnique
    elif kinds > m:
        return SubstringState.ExcessivelyUnique
    elif allAtLeastK:
        return SubstringState.AllAtLeastK
    else:
        return SubstringState.SomeCharLessThanK

class Solution:
    def longestSubstring(self, s: str, k: int) -> int:
        # Strategy
        # Use the sliding window technique to solve the problem in O(N).
        # However, we cannot apply it to the original problem naively
        # because we will not know when to move the left edge.
        # So, we will lower the degree of freedom of the problem to know when
        # to move the left edge of the window.
        # This way, we can apply the sliding window technique to the constrained problem
        # and solve the original problem in O(N).
        # More specifically, our easier problem statement is:
        #   What is the length of the longest substring of s
        #   which consists of m unique chars and all of them appear at least k times?
        uniqueSet = set(s)
        maxLen = 0
        for m in range(1, len(uniqueSet)+1):
            # Check for such strings with m unique characters
            left = 0
            right = 0
            freqDict = defaultdict(int)
            while right <= len(s):
                # Check if s[left:right] satisfies the condition
                # Note that freqDict is synced up with s[left:right]
                # at the beginning of this loop
                state = getSubstringState(freqDict, k, m)
                if state == SubstringState.InsufficientlyUnique:
                    # Push the right edge to get more chars in the substring
                    if right < len(s):
                        addingChar = s[right]
                        freqDict[addingChar] += 1
                    right += 1
                elif state == SubstringState.ExcessivelyUnique:
                    # Push the left edge to reduce chars in the substring
                    removingChar = s[left]
                    freqDict[removingChar] -= 1
                    left += 1
                elif state == SubstringState.AllAtLeastK:
                    # Update the max and push the right edge to get more chars in the substring
                    maxLen = max(maxLen, right-left)
                    if right < len(s):
                        addingChar = s[right]
                        freqDict[addingChar] += 1
                    right += 1
                else:
                    # state is SubstringState.SomeCharLessThanK
                    # Push the right edge to get more chars in the substring
                    if right < len(s):
                        addingChar = s[right]
                        freqDict[addingChar] += 1
                    right += 1
        return maxLen
Loading...
Level up your coding skills and quickly land a job. This is the best place to expand your knowledge and get prepared for your next interview.