10 Real-World Regex Problems and How Professionals Solve Them

1. Redacting secrets from logs

// Redact credit card numbers (common formats)
const redactCC = (log: string) =>
  log.replace(/(?:d[ -]?){13,16}/g, '[CC_REDACTED]');

// Redact API keys and tokens (typically 20-64 hex/base64 chars after key= or token=)
const redactKeys = (log: string) =>
  log.replace(/((?:api[_-]?key|token|secret|password)s*[:=]s*)["']?[w-]{20,}["']?/gi,
    '$1[REDACTED]');

// Redact email addresses
const redactEmail = (log: string) =>
  log.replace(/[^s@]+@[^s@]+.[^s@]+/g, '[EMAIL_REDACTED]');

// Compose into a pipeline:
const sanitizeLog = (log: string) =>
  [redactCC, redactKeys, redactEmail]
    .reduce((text, fn) => fn(text), log);

2. Parsing CSV with quoted fields containing commas

// Naive CSV split breaks on: "Smith, John",30,"New York, NY"
// .split(',') → [""Smith", " John"", "30", ""New York", " NY""]

// Correct approach using regex:
function parseCSVLine(line: string): string[] {
  const fields: string[] = [];
  const re = /("(?:[^"]|"")*"|[^,]*)(,|$)/g;
  let match;
  while ((match = re.exec(line)) !== null) {
    let field = match[1];
    if (field.startsWith('"')) {
      field = field.slice(1, -1).replace(/""/g, '"');  // unescape doubled quotes
    }
    fields.push(field);
    if (match[2] === '') break;  // end of line
  }
  return fields;
}
// For production: use a battle-tested CSV library (papaparse, csv-parse)

3. Extracting URLs from raw text

// Extract URLs from plain text (not HTML):
const URL_PATTERN = /https?://(?:[-w.]|(?:%[da-fA-F]{2}))+(?:/[^s]*)?/g;

function extractUrls(text: string): string[] {
  return [...text.matchAll(URL_PATTERN)].map(m => m[0]);
}

// Tricky edge cases this handles:
// - URLs with query strings and fragments
// - Encoded characters (%20, etc.)
// - Sub-paths and file extensions

// What it does NOT handle perfectly:
// - Bare domains without http:// (twitter.com, not https://twitter.com)
// - URLs in parentheses where the ) is part of the URL
// - Those require a more complex parser or manual review

4. Finding TODO/FIXME comments in source code

// In a grep/ripgrep context:
// rg "TODO|FIXME|HACK|XXX|BUG" --glob "*.ts"

// In JavaScript (parsing source files):
const TODO_PATTERN = /(?://|#|/*)s*(TODO|FIXME|HACK|XXX|BUG)(?:(([^)]+)))?:?s*(.+)/gi;

const todos = [...source.matchAll(TODO_PATTERN)].map(m => ({
  type: m[1].toUpperCase(),
  owner: m[2] || null,      // "TODO(alex): ..." captures "alex"
  message: m[3].trim(),
}));

// Output: [{type: "TODO", owner: "alex", message: "refactor this"}, ...]

5. Normalizing inconsistent date formats

// Normalize dates from various formats to YYYY-MM-DD:
// Input variations: "12/25/2026", "25-12-2026", "December 25, 2026", "2026.12.25"

const MONTH_MAP: Record<string, string> = {
  january: '01', february: '02', march: '03', april: '04',
  may: '05', june: '06', july: '07', august: '08',
  september: '09', october: '10', november: '11', december: '12',
};

function normalizeDate(input: string): string | null {
  // MM/DD/YYYY or DD-MM-YYYY (ambiguous — assume MM/DD for / and DD-MM for -)
  const slashMatch = input.match(/^(d{1,2})/(d{1,2})/(d{4})$/);
  if (slashMatch) return `${slashMatch[3]}-${slashMatch[1].padStart(2,'0')}-${slashMatch[2].padStart(2,'0')}`;

  // Month name: "December 25, 2026"
  const nameMatch = input.match(/^(w+)s+(d{1,2}),?s+(d{4})$/i);
  if (nameMatch) {
    const month = MONTH_MAP[nameMatch[1].toLowerCase()];
    if (month) return `${nameMatch[3]}-${month}-${nameMatch[2].padStart(2,'0')}`;
  }

  // Already ISO: return as-is
  if (/^d{4}-d{2}-d{2}$/.test(input)) return input;

  return null;
}

6. Replacing template variables in strings

// Replace {{variable}} placeholders with values from an object
function renderTemplate(template: string, vars: Record<string, string>): string {
  return template.replace(/{{(w+)}}/g, (_, key) => {
    if (key in vars) return vars[key];
    // Leave unknown variables as-is or throw:
    return `{{MISSING:${key}}}`;
  });
}

renderTemplate("Hello, {{name}}! Your order {{orderId}} is ready.", {
  name: "Alex",
  orderId: "ORD-12345"
});
// "Hello, Alex! Your order ORD-12345 is ready."

7. Validating semantic version strings

// Full semver validation per semver.org spec:
const SEMVER = /^(?<major>0|[1-9]d*).(?<minor>0|[1-9]d*).(?<patch>0|[1-9]d*)(?:-(?<prerelease>(?:0|[1-9]d*|d*[a-zA-Z-][0-9a-zA-Z-]*)(?:.(?:0|[1-9]d*|d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:+(?<buildmeta>[0-9a-zA-Z-]+(?:.[0-9a-zA-Z-]+)*))?$/;

function parseSemver(version: string) {
  const match = version.match(SEMVER);
  if (!match) return null;
  return {
    major: parseInt(match.groups!.major),
    minor: parseInt(match.groups!.minor),
    patch: parseInt(match.groups!.patch),
    prerelease: match.groups!.prerelease ?? null,
    buildmeta: match.groups!.buildmeta ?? null,
  };
}
// "1.2.3-alpha.1+build.123" → { major:1, minor:2, patch:3, prerelease:"alpha.1", ... }

8. Splitting on whitespace while respecting quoted strings

// Split shell-like argument strings:
// 'git commit -m "my message here"' → ["git", "commit", "-m", "my message here"]

function shellSplit(input: string): string[] {
  const args: string[] = [];
  const re = /"([^"]*)"|'([^']*)'|(S+)/g;
  for (const match of input.matchAll(re)) {
    args.push(match[1] ?? match[2] ?? match[3]);
  }
  return args;
}

shellSplit('git commit -m "my message here" --author="Alex"');
// ["git", "commit", "-m", "my message here", "--author=Alex"]

9. Detecting and extracting import statements

// Extract all imports from TypeScript/JavaScript source:
const IMPORT_RE = /^imports+(?:types+)?(?:(?:(w+)|(?:{([^}]+)})|(?:*s+ass+(w+)))s+froms+)?['"]([^'"]+)['"]/gm;

function extractImports(source: string) {
  return [...source.matchAll(IMPORT_RE)].map(m => ({
    default: m[1] ?? null,
    named: m[2] ? m[2].split(',').map(s => s.trim()) : [],
    namespace: m[3] ?? null,
    from: m[4],
  }));
}
// Works on: import React from 'react'
// import { useState, useEffect } from 'react'
// import * as fs from 'fs'
// import type { Metadata } from 'next'

10. Counting words while excluding code blocks

// Word count for markdown, excluding fenced code blocks:
function wordCount(markdown: string): number {
  // Remove fenced code blocks (``` ... ```)
  const noCode = markdown.replace(/```[\s\S]*?```/g, '');
  // Remove inline code
  const noInline = noCode.replace(/`[^`]+`/g, '');
  // Remove URLs (don't count as words)
  const noUrls = noInline.replace(/https?://S+/g, '');
  // Count words: sequences of word characters
  return (noUrls.match(/w+/g) ?? []).length;
}

Test these patterns in your browser

Regex Tester — test patterns with live highlighting →