Too much regex?

Ned Batchelder
@nedbat

The goal

    def substitute_variables(
        text: str, variables: dict[str, str],
    ) -> str:
        """
        Substitute ``${VAR}`` variables in `text`.

        Variables in the text can take a number of
        shell-inspired forms::

            $VAR
            ${VAR}
            ${VAR?}         strict: an error if no VAR.
            ${VAR-miss}     defaulted: "miss" if no VAR.
            $$              just a dollar sign.

        `variables` is a dictionary of variable values.

        Returns the resulting text with values substituted.

        """
    

In action

    def test_substitute():
        text = substitute_variables(
            "Look: $FOO ${BAR-default} $$",
            {'FOO': 'Xyzzy'},
        )
        assert text == "Look: Xyzzy default $"
    

The regex!

    dollar_pattern = r"""(?x)   # Verbose regex syntax
        \$                      # A dollar sign,
        (?:                     # then
            (?P<dollar> \$ ) |      # a dollar sign, or
            (?P<word1> \w+ ) |      # a plain word, or
            \{                      # a {-wrapped
                (?P<word2> \w+ )        # word,
                (?:
                    (?P<strict> \? ) |      # strict or
                    -(?P<defval> [^}]* )    # defaulted
                )?                      # maybe
            }
        )
        """
    

Using it

    text = re.sub(dollar_pattern, dollar_replace, text)
    return text
    
    def dollar_replace(match: re.Match[str]) -> str:
        """Called for each $replacement."""
    

Replacement

    def dollar_replace(match: re.Match[str]) -> str:
        """Called for each $replacement."""
        # Get the one group that matched.
        groups = match.group('dollar', 'word1', 'word2')
        word = next(g for g in groups if g)

        if word == "$":
            return "$"
        elif word in variables:
            return variables[word]
        elif match["strict"]:
            msg = f"Variable {word} is undefined: {text!r}"
            raise NameError(msg)
        else:
            return match["defval"]
    

Side rant: any()

If only

    groups = (None, "something", None)
    
    word = any(groups)                      # DOESN'T WORK! ☹
    
    word = next(g for g in groups if g)     # UGLY, but works
    

Replacement

    def dollar_replace(match: re.Match[str]) -> str:
        """Called for each $replacement."""
        # Get the one group that matched.
        groups = match.group('dollar', 'word1', 'word2')
        word = next(g for g in groups if g)

        if word == "$":
            return "$"
        elif word in variables:
            return variables[word]
        elif match["strict"]:
            msg = f"Variable {word} is undefined: {text!r}"
            raise NameError(msg)
        else:
            return match["defval"]
    
    def substitute_variables(
        text: str, variables: dict[str, str],
    ) -> str:
        """
        Substitute ``${VAR}`` variables in `text`.

        Variables in the text can take a number of
        shell-inspired forms::

            $VAR
            ${VAR}
            ${VAR?}         strict: an error if no VAR.
            ${VAR-miss}     defaulted: "miss" if no VAR.
            $$              just a dollar sign.

        `variables` is a dictionary of variable values.

        Returns the resulting text with values substituted.

        """
        dollar_pattern = r"""(?x)   # Verbose regex syntax
            \$                      # A dollar sign,
            (?:                     # then
                (?P<dollar> \$ ) |      # a dollar sign, or
                (?P<word1> \w+ ) |      # a plain word, or
                \{                      # a {-wrapped
                    (?P<word2> \w+ )        # word,
                    (?:
                        (?P<strict> \? ) |      # strict or
                        -(?P<defval> [^}]* )    # defaulted
                    )?                      # maybe
                }
            )
            """

        def dollar_replace(match: re.Match[str]) -> str:
            """Called for each $replacement."""
            # Get the one group that matched.
            groups = match.group('dollar', 'word1', 'word2')
            word = next(g for g in groups if g)

            if word == "$":
                return "$"
            elif word in variables:
                return variables[word]
            elif match["strict"]:
                msg = f"Variable {word} is undefined: {text!r}"
                raise NameError(msg)
            else:
                return match["defval"]

        text = re.sub(dollar_pattern, dollar_replace, text)
        return text
    

Too much?

Thanks

@nedbat