Strings

Data Structures & Algorithms

Strings

Strings as Character Arrays

A string is a sequence of characters. Internally, it’s just an array of chars — everything you learned about arrays applies here, plus specialized string algorithms.

Real-world analogy: DNA is a string over alphabet {A, T, G, C}. Bioinformatics is essentially string algorithms applied to DNA — pattern matching, alignment, comparing sequences. The same algorithms used to search for a virus mutation are used in text editors to find and replace.


Part 1 — String Fundamentals in C++

#include <string>

string s = "Hello, World!";

// Size
s.length();     // 13
s.size();       // 13 — identical

// Access
s[0];           // 'H'
s.front();      // 'H'
s.back();       // '!'

// Substring
s.substr(7, 5); // "World" — start at 7, length 5
s.substr(7);    // "World!" — from 7 to end

// Find
s.find("World");          // 7 — first occurrence
s.rfind("l");             // 10 — last occurrence
s.find("xyz");            // string::npos — not found

// Modify
s += " Bye";              // concatenate
s.replace(7, 5, "C++");  // "Hello, C++!"
s.insert(5, ",");
s.erase(5, 1);

// Comparison — lexicographic
"abc" < "abd";   // true
"abc" == "abc";  // true

// Iteration
for (char c : s) cout << c;
for (int i = 0; i < s.size(); i++) cout << s[i];

// Convert
string num = to_string(42);       // "42"
int n = stoi("42");               // 42
double d = stod("3.14");          // 3.14

// Case conversion
transform(s.begin(), s.end(), s.begin(), ::toupper);
transform(s.begin(), s.end(), s.begin(), ::tolower);

Part 2 — String Algorithms

Palindrome Check

A string that reads the same forward and backward.

bool isPalindrome(string s) {
    int left = 0, right = s.size() - 1;
    while (left < right) {
        if (s[left] != s[right]) return false;
        left++; right--;
    }
    return true;
}
// "racecar" → true, "hello" → false
// Time: O(n), Space: O(1)

// Ignore non-alphanumeric, case-insensitive
bool isPalindromeAdvanced(string s) {
    int left = 0, right = s.size() - 1;
    while (left < right) {
        while (left < right && !isalnum(s[left])) left++;
        while (left < right && !isalnum(s[right])) right--;
        if (tolower(s[left]) != tolower(s[right])) return false;
        left++; right--;
    }
    return true;
}
// "A man, a plan, a canal: Panama" → true

Anagram Check

Two strings are anagrams if one is a rearrangement of the other.

// Sort approach — O(n log n)
bool isAnagram(string a, string b) {
    sort(a.begin(), a.end());
    sort(b.begin(), b.end());
    return a == b;
}

// Frequency count — O(n)
bool isAnagram(string a, string b) {
    if (a.size() != b.size()) return false;
    int freq[26] = {};
    for (char c : a) freq[c - 'a']++;
    for (char c : b) {
        freq[c - 'a']--;
        if (freq[c - 'a'] < 0) return false;
    }
    return true;
}
// "listen" and "silent" → true

Reverse Words in a String

string reverseWords(string s) {
    // 1. reverse entire string
    reverse(s.begin(), s.end());

    // 2. reverse each word
    int start = 0;
    for (int i = 0; i <= s.size(); i++) {
        if (i == s.size() || s[i] == ' ') {
            reverse(s.begin() + start, s.begin() + i);
            start = i + 1;
        }
    }
    return s;
}
// "Hello World" → "World Hello"

Part 3 — Pattern Matching

Check every position in text for the pattern.

vector<int> naiveSearch(string text, string pattern) {
    vector<int> result;
    int n = text.size(), m = pattern.size();
    for (int i = 0; i <= n - m; i++) {
        bool match = true;
        for (int j = 0; j < m; j++) {
            if (text[i+j] != pattern[j]) { match = false; break; }
        }
        if (match) result.push_back(i);
    }
    return result;
}
// Time: O(nm)

KMP — Knuth-Morris-Pratt

Avoids re-examining characters by using information from previous matches. The key insight: when a mismatch happens, we already know part of the text — use that to skip comparisons.

Analogy: You’re searching for “ABAB” in text. You’ve matched “ABA” and then mismatch. Instead of going back to the start, you realize “AB” at the end of your match is also the start of the pattern — so you continue from there.

LPS Array (Longest Proper Prefix which is also Suffix):

vector<int> buildLPS(string pattern) {
    int m = pattern.size();
    vector<int> lps(m, 0);
    int len = 0, i = 1;

    while (i < m) {
        if (pattern[i] == pattern[len]) {
            lps[i++] = ++len;
        } else if (len > 0) {
            len = lps[len-1];
        } else {
            lps[i++] = 0;
        }
    }
    return lps;
}
// "AABAAB": lps = {0,1,0,1,2,3}
vector<int> KMP(string text, string pattern) {
    vector<int> result;
    int n = text.size(), m = pattern.size();
    vector<int> lps = buildLPS(pattern);

    int i = 0, j = 0;
    while (i < n) {
        if (text[i] == pattern[j]) { i++; j++; }
        if (j == m) {
            result.push_back(i - j);
            j = lps[j-1];
        } else if (i < n && text[i] != pattern[j]) {
            if (j > 0) j = lps[j-1];
            else i++;
        }
    }
    return result;
}
// Time: O(n + m), Space: O(m)

Rabin-Karp — Rolling Hash

Use hashing to quickly find candidate positions, then verify.

vector<int> RabinKarp(string text, string pattern) {
    vector<int> result;
    int n = text.size(), m = pattern.size();
    long long base = 31, mod = 1e9 + 7;
    long long power = 1;

    for (int i = 0; i < m - 1; i++) power = power * base % mod;

    long long patHash = 0, windowHash = 0;
    for (int i = 0; i < m; i++) {
        patHash = (patHash * base + (pattern[i] - 'a' + 1)) % mod;
        windowHash = (windowHash * base + (text[i] - 'a' + 1)) % mod;
    }

    for (int i = 0; i <= n - m; i++) {
        if (windowHash == patHash)
            if (text.substr(i, m) == pattern)  // verify
                result.push_back(i);

        if (i < n - m) {
            windowHash = (windowHash - (text[i] - 'a' + 1) * power % mod + mod) % mod;
            windowHash = (windowHash * base + (text[i+m] - 'a' + 1)) % mod;
        }
    }
    return result;
}
// Time: O(n + m) average, O(nm) worst (many hash collisions)

Part 4 — Important String Problems

Longest Common Subsequence (LCS)

Find the longest subsequence present in both strings (characters don’t need to be contiguous).

int LCS(string a, string b) {
    int m = a.size(), n = b.size();
    vector<vector<int>> dp(m+1, vector<int>(n+1, 0));

    for (int i = 1; i <= m; i++)
        for (int j = 1; j <= n; j++)
            if (a[i-1] == b[j-1])
                dp[i][j] = dp[i-1][j-1] + 1;
            else
                dp[i][j] = max(dp[i-1][j], dp[i][j-1]);

    return dp[m][n];
}
// "ABCBDAB" and "BDCAB" → 4 ("BCAB" or "BDAB")
// Time: O(mn), Space: O(mn)

Longest Common Substring

Find the longest contiguous substring present in both.

int longestCommonSubstring(string a, string b) {
    int m = a.size(), n = b.size(), maxLen = 0;
    vector<vector<int>> dp(m+1, vector<int>(n+1, 0));

    for (int i = 1; i <= m; i++)
        for (int j = 1; j <= n; j++)
            if (a[i-1] == b[j-1]) {
                dp[i][j] = dp[i-1][j-1] + 1;
                maxLen = max(maxLen, dp[i][j]);
            }

    return maxLen;
}

Longest Palindromic Substring

Expand Around Center approach:

string longestPalindrome(string s) {
    int start = 0, maxLen = 1;

    auto expand = [&](int left, int right) {
        while (left >= 0 && right < s.size() && s[left] == s[right]) {
            if (right - left + 1 > maxLen) {
                maxLen = right - left + 1;
                start = left;
            }
            left--; right++;
        }
    };

    for (int i = 0; i < s.size(); i++) {
        expand(i, i);     // odd length palindromes
        expand(i, i+1);   // even length palindromes
    }

    return s.substr(start, maxLen);
}
// "babad" → "bab" or "aba"
// Time: O(n²), Space: O(1)

String Compression

string compress(string s) {
    string result = "";
    int i = 0;
    while (i < s.size()) {
        char c = s[i];
        int count = 0;
        while (i < s.size() && s[i] == c) { i++; count++; }
        result += c;
        if (count > 1) result += to_string(count);
    }
    return result.size() < s.size() ? result : s;
}
// "aabcccccaaa" → "a2bc5a3"
// "abc" → "abc" (not compressed — longer)

Practice Problems

Easy: 1. Count the number of vowels and consonants in a string. 2. Check if a string is a rotation of another (“waterbottle” is rotation of “erbottlewat”). 3. Find the first non-repeating character in a string.

Medium: 4. Given a string, find the length of the longest substring without repeating characters. 5. Given a string of brackets, check if they are balanced. 6. Find all permutations of a string.

Hard: 7. Find the minimum window substring containing all characters of a pattern. 8. Find the longest substring with at most k distinct characters.


Answers to Selected Problems

Problem 2 (Rotation check):

bool isRotation(string a, string b) {
    if (a.size() != b.size()) return false;
    return (a + a).find(b) != string::npos;
    // If b is a rotation of a, it appears in a+a
}

Problem 3 (First non-repeating):

char firstUnique(string s) {
    int freq[26] = {};
    for (char c : s) freq[c-'a']++;
    for (char c : s)
        if (freq[c-'a'] == 1) return c;
    return '\0';
}

Problem 4 (Longest substring without repeating):

int lengthOfLongestSubstring(string s) {
    unordered_map<char, int> lastSeen;
    int left = 0, maxLen = 0;
    for (int right = 0; right < s.size(); right++) {
        if (lastSeen.count(s[right]) && lastSeen[s[right]] >= left)
            left = lastSeen[s[right]] + 1;
        lastSeen[s[right]] = right;
        maxLen = max(maxLen, right - left + 1);
    }
    return maxLen;
}
// "abcabcbb" → 3 ("abc")
// Sliding window + hash map

Problem 7 (Minimum window substring):

string minWindow(string s, string t) {
    unordered_map<char, int> need, have;
    for (char c : t) need[c]++;
    int formed = 0, required = need.size();
    int left = 0, minLen = INT_MAX, start = 0;

    for (int right = 0; right < s.size(); right++) {
        char c = s[right];
        have[c]++;
        if (need.count(c) && have[c] == need[c]) formed++;

        while (formed == required) {
            if (right - left + 1 < minLen) {
                minLen = right - left + 1;
                start = left;
            }
            have[s[left]]--;
            if (need.count(s[left]) && have[s[left]] < need[s[left]])
                formed--;
            left++;
        }
    }
    return minLen == INT_MAX ? "" : s.substr(start, minLen);
}
// s="ADOBECODEBANC", t="ABC" → "BANC"

References

  • Cormen et al. — Introduction to Algorithms — Chapter 32 (String Matching)
  • Sedgewick & Wayne — Algorithms — Chapter 5 (Strings)
  • KMP Visualization — visualgo.net/en/suffixarray
  • LeetCode — String problems
start typing to search