← Back to Blog
·11 min read

10 Real-World Regex Problems and How Professionals Solve Them

Regex tutorials teach you character classes and quantifiers. They rarely show you the messy, ambiguous problems you actually face in production code. Here are ten real scenarios — with complete solutions and explanations of the tradeoffs.

1. Redacting secrets from logs

// Redact credit card numbers (common formats)
const redactCC = (log: string) =>
  log.replace(/(?:d[ -]?){13,16}/g, '[CC_REDACTED]');

// Redact API keys and tokens (typically 20-64 hex/base64 chars after key= or token=)
const redactKeys = (log: string) =>
  log.replace(/((?:api[_-]?key|token|secret|password)s*[:=]s*)["']?[w-]{20,}["']?/gi,
    '$1[REDACTED]');

// Redact email addresses
const redactEmail = (log: string) =>
  log.replace(/[^s@]+@[^s@]+.[^s@]+/g, '[EMAIL_REDACTED]');

// Compose into a pipeline:
const sanitizeLog = (log: string) =>
  [redactCC, redactKeys, redactEmail]
    .reduce((text, fn) => fn(text), log);

2. Parsing CSV with quoted fields containing commas

// Naive CSV split breaks on: "Smith, John",30,"New York, NY"
// .split(',') → [""Smith", " John"", "30", ""New York", " NY""]

// Correct approach using regex:
function parseCSVLine(line: string): string[] {
  const fields: string[] = [];
  const re = /("(?:[^"]|"")*"|[^,]*)(,|$)/g;
  let match;
  while ((match = re.exec(line)) !== null) {
    let field = match[1];
    if (field.startsWith('"')) {
      field = field.slice(1, -1).replace(/""/g, '"');  // unescape doubled quotes
    }
    fields.push(field);
    if (match[2] === '') break;  // end of line
  }
  return fields;
}
// For production: use a battle-tested CSV library (papaparse, csv-parse)

3. Extracting URLs from raw text

// Extract URLs from plain text (not HTML):
const URL_PATTERN = /https?://(?:[-w.]|(?:%[da-fA-F]{2}))+(?:/[^s]*)?/g;

function extractUrls(text: string): string[] {
  return [...text.matchAll(URL_PATTERN)].map(m => m[0]);
}

// Tricky edge cases this handles:
// - URLs with query strings and fragments
// - Encoded characters (%20, etc.)
// - Sub-paths and file extensions

// What it does NOT handle perfectly:
// - Bare domains without http:// (twitter.com, not https://twitter.com)
// - URLs in parentheses where the ) is part of the URL
// - Those require a more complex parser or manual review

4. Finding TODO/FIXME comments in source code

// In a grep/ripgrep context:
// rg "TODO|FIXME|HACK|XXX|BUG" --glob "*.ts"

// In JavaScript (parsing source files):
const TODO_PATTERN = /(?://|#|/*)s*(TODO|FIXME|HACK|XXX|BUG)(?:(([^)]+)))?:?s*(.+)/gi;

const todos = [...source.matchAll(TODO_PATTERN)].map(m => ({
  type: m[1].toUpperCase(),
  owner: m[2] || null,      // "TODO(alex): ..." captures "alex"
  message: m[3].trim(),
}));

// Output: [{type: "TODO", owner: "alex", message: "refactor this"}, ...]

5. Normalizing inconsistent date formats

// Normalize dates from various formats to YYYY-MM-DD:
// Input variations: "12/25/2026", "25-12-2026", "December 25, 2026", "2026.12.25"

const MONTH_MAP: Record<string, string> = {
  january: '01', february: '02', march: '03', april: '04',
  may: '05', june: '06', july: '07', august: '08',
  september: '09', october: '10', november: '11', december: '12',
};

function normalizeDate(input: string): string | null {
  // MM/DD/YYYY or DD-MM-YYYY (ambiguous — assume MM/DD for / and DD-MM for -)
  const slashMatch = input.match(/^(d{1,2})/(d{1,2})/(d{4})$/);
  if (slashMatch) return `${slashMatch[3]}-${slashMatch[1].padStart(2,'0')}-${slashMatch[2].padStart(2,'0')}`;

  // Month name: "December 25, 2026"
  const nameMatch = input.match(/^(w+)s+(d{1,2}),?s+(d{4})$/i);
  if (nameMatch) {
    const month = MONTH_MAP[nameMatch[1].toLowerCase()];
    if (month) return `${nameMatch[3]}-${month}-${nameMatch[2].padStart(2,'0')}`;
  }

  // Already ISO: return as-is
  if (/^d{4}-d{2}-d{2}$/.test(input)) return input;

  return null;
}

6. Replacing template variables in strings

// Replace {{variable}} placeholders with values from an object
function renderTemplate(template: string, vars: Record<string, string>): string {
  return template.replace(/{{(w+)}}/g, (_, key) => {
    if (key in vars) return vars[key];
    // Leave unknown variables as-is or throw:
    return `{{MISSING:${key}}}`;
  });
}

renderTemplate("Hello, {{name}}! Your order {{orderId}} is ready.", {
  name: "Alex",
  orderId: "ORD-12345"
});
// "Hello, Alex! Your order ORD-12345 is ready."

7. Validating semantic version strings

// Full semver validation per semver.org spec:
const SEMVER = /^(?<major>0|[1-9]d*).(?<minor>0|[1-9]d*).(?<patch>0|[1-9]d*)(?:-(?<prerelease>(?:0|[1-9]d*|d*[a-zA-Z-][0-9a-zA-Z-]*)(?:.(?:0|[1-9]d*|d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:+(?<buildmeta>[0-9a-zA-Z-]+(?:.[0-9a-zA-Z-]+)*))?$/;

function parseSemver(version: string) {
  const match = version.match(SEMVER);
  if (!match) return null;
  return {
    major: parseInt(match.groups!.major),
    minor: parseInt(match.groups!.minor),
    patch: parseInt(match.groups!.patch),
    prerelease: match.groups!.prerelease ?? null,
    buildmeta: match.groups!.buildmeta ?? null,
  };
}
// "1.2.3-alpha.1+build.123" → { major:1, minor:2, patch:3, prerelease:"alpha.1", ... }

8. Splitting on whitespace while respecting quoted strings

// Split shell-like argument strings:
// 'git commit -m "my message here"' → ["git", "commit", "-m", "my message here"]

function shellSplit(input: string): string[] {
  const args: string[] = [];
  const re = /"([^"]*)"|'([^']*)'|(S+)/g;
  for (const match of input.matchAll(re)) {
    args.push(match[1] ?? match[2] ?? match[3]);
  }
  return args;
}

shellSplit('git commit -m "my message here" --author="Alex"');
// ["git", "commit", "-m", "my message here", "--author=Alex"]

9. Detecting and extracting import statements

// Extract all imports from TypeScript/JavaScript source:
const IMPORT_RE = /^imports+(?:types+)?(?:(?:(w+)|(?:{([^}]+)})|(?:*s+ass+(w+)))s+froms+)?['"]([^'"]+)['"]/gm;

function extractImports(source: string) {
  return [...source.matchAll(IMPORT_RE)].map(m => ({
    default: m[1] ?? null,
    named: m[2] ? m[2].split(',').map(s => s.trim()) : [],
    namespace: m[3] ?? null,
    from: m[4],
  }));
}
// Works on: import React from 'react'
// import { useState, useEffect } from 'react'
// import * as fs from 'fs'
// import type { Metadata } from 'next'

10. Counting words while excluding code blocks

// Word count for markdown, excluding fenced code blocks:
function wordCount(markdown: string): number {
  // Remove fenced code blocks (``` ... ```)
  const noCode = markdown.replace(/```[\s\S]*?```/g, '');
  // Remove inline code
  const noInline = noCode.replace(/`[^`]+`/g, '');
  // Remove URLs (don't count as words)
  const noUrls = noInline.replace(/https?://S+/g, '');
  // Count words: sequences of word characters
  return (noUrls.match(/w+/g) ?? []).length;
}

Published June 8, 2026 · By the utili.dev Team