Too much regex?
Ned Batchelder
@nedbat
The goal
def substitute_variables(text, variables):
"""
Substitute ``${VAR}`` variables in `text`.
Variables in the text can take a number of
shell-inspired forms::
$VAR
${VAR}
${VAR?} strict: an error if no VAR.
${VAR-miss} defaulted: "miss" if no VAR.
$$ just a dollar sign.
`variables` is a dictionary of variable values.
Returns the resulting text with values substituted.
"""
In action
def test_substitute():
before = "Look: $FOO ${BAR-default} $$"
data = {'FOO': 'Xyzzy'}
after = substitute_variables(before, data)
assert after == "Look: Xyzzy default $"
The regex!
dollar_pattern = r"""(?x) # Verbose regex syntax
\$ # A dollar sign,
(?: # then
(?P<dollar> \$ ) | # a dollar sign, or
(?P<word1> \w+ ) | # a plain word, or
{ # a {-wrapped
(?P<word2> \w+ ) # word,
(?:
(?P<strict> \? ) | # strict or
-(?P<defval> [^}]* ) # defaulted
)? # maybe
}
)
"""
Using it
text = re.sub(dollar_pattern, dollar_replace, text)
return text
def dollar_replace(match):
"""Called for each $replacement."""
Replacement
def dollar_replace(match):
"""Called for each $replacement."""
# Get the one group that matched.
groups = match.group('dollar', 'word1', 'word2')
word = next(g for g in groups if g)
if word == "$":
return "$"
elif word in variables:
return variables[word]
elif match.group('strict'):
msg = "Variable {} is undefined: {!r}"
raise NameError(msg.format(word, text))
else:
return match.group('defval')
Side rant: any()
- These are similar:
x or y or z
any([x, y, z])
- Except!
x or y or z # returns the true thing
any([x, y, z]) # returns True or False
If only
groups = (None, "something", None)
word = any(groups) # DOESN'T WORK! ☹
word = next(g for g in groups if g) # UGLY, but works
Replacement
def dollar_replace(match):
"""Called for each $replacement."""
# Get the one group that matched.
groups = match.group('dollar', 'word1', 'word2')
word = next(g for g in groups if g)
if word == "$":
return "$"
elif word in variables:
return variables[word]
elif match.group('strict'):
msg = "Variable {} is undefined: {!r}"
raise NameError(msg.format(word, text))
else:
return match.group('defval')
def substitute_variables(text, variables):
"""
Substitute ``${VAR}`` variables in `text`.
Variables in the text can take a number of
shell-inspired forms::
$VAR
${VAR}
${VAR?} strict: an error if no VAR.
${VAR-miss} defaulted: "miss" if no VAR.
$$ just a dollar sign.
`variables` is a dictionary of variable values.
Returns the resulting text with values substituted.
"""
dollar_pattern = r"""(?x) # Verbose regex syntax
\$ # A dollar sign,
(?: # then
(?P<dollar> \$ ) | # a dollar sign, or
(?P<word1> \w+ ) | # a plain word, or
{ # a {-wrapped
(?P<word2> \w+ ) # word,
(?:
(?P<strict> \? ) | # strict or
-(?P<defval> [^}]* ) # defaulted
)? # maybe
}
)
"""
def dollar_replace(match):
"""Called for each $replacement."""
# Get the one group that matched.
groups = match.group('dollar', 'word1', 'word2')
word = next(g for g in groups if g)
if word == "$":
return "$"
elif word in variables:
return variables[word]
elif match.group('strict'):
msg = "Variable {} is undefined: {!r}"
raise NameError(msg.format(word, text))
else:
return match.group('defval')
text = re.sub(dollar_pattern, dollar_replace, text)
return text