I'm writing a class RecurringInterval
which - based on the dateutil.rrule object - represents a recurring interval in time. I have defined a custom, human-readable __str__
method for it and would like to also define a parse
method which (similar to the rrulestr() function) parses the string back into an object.
Here is the parse
method and some test cases to go with it:
import re
from dateutil.rrule import FREQNAMES
import pytest
class RecurringInterval(object):
freq_fmt = "{freq}"
start_fmt = "from {start}"
end_fmt = "till {end}"
byweekday_fmt = "by weekday {byweekday}"
bymonth_fmt = "by month {bymonth}"
@classmethod
def match_pattern(cls, string):
SPACES = r'\s*'
freq_names = [freq.lower() for freq in FREQNAMES] + [freq.title() for freq in FREQNAMES] # The frequencies may be either lowercase or start with a capital letter
FREQ_PATTERN = '(?P<freq>{})?'.format("|".join(freq_names))
# Start and end are required (their regular expressions match 1 repetition)
START_PATTERN = cls.start_fmt.format(start=SPACES + r'(?P<start>.+?)')
END_PATTERN = cls.end_fmt.format(end=SPACES + r'(?P<end>.+?)')
# The remaining tokens are optional (their regular expressions match 0 or 1 repetitions)
BYWEEKDAY_PATTERN = cls.optional(cls.byweekday_fmt.format(byweekday=SPACES + r'(?P<byweekday>.+?)'))
BYMONTH_PATTERN = cls.optional(cls.bymonth_fmt.format(bymonth=SPACES + r'(?P<bymonth>.+?)'))
PATTERN = SPACES + FREQ_PATTERN \
+ SPACES + START_PATTERN \
+ SPACES + END_PATTERN \
+ SPACES + BYWEEKDAY_PATTERN \
+ SPACES + BYMONTH_PATTERN \
+ SPACES + "$" # The character '$' is needed to make the non-greedy regular expressions parse till the end of the string
return re.match(PATTERN, string).groupdict()
@staticmethod
def optional(pattern):
'''Encloses the given regular expression in an optional group (i.e., one that matches 0 or 1 repetitions of the original regular expression).'''
return '({})?'.format(pattern)
'''Tests'''
def test_match_pattern_with_byweekday_and_bymonth():
string = "Weekly from 2017-11-03 15:00:00 till 2017-11-03 16:00:00 by weekday Monday, Tuesday by month January, February"
groups = RecurringInterval.match_pattern(string)
assert groups['freq'] == "Weekly"
assert groups['start'].strip() == "2017-11-03 15:00:00"
assert groups['end'].strip() == "2017-11-03 16:00:00"
assert groups['byweekday'].strip() == "Monday, Tuesday"
assert groups['bymonth'].strip() == "January, February"
def test_match_pattern_with_bymonth_and_byweekday():
string = "Weekly from 2017-11-03 15:00:00 till 2017-11-03 16:00:00 by month January, February by weekday Monday, Tuesday "
groups = RecurringInterval.match_pattern(string)
assert groups['freq'] == "Weekly"
assert groups['start'].strip() == "2017-11-03 15:00:00"
assert groups['end'].strip() == "2017-11-03 16:00:00"
assert groups['byweekday'].strip() == "Monday, Tuesday"
assert groups['bymonth'].strip() == "January, February"
if __name__ == "__main__":
# pytest.main([__file__])
pytest.main([__file__+"::test_match_pattern_with_byweekday_and_bymonth"]) # This passes
# pytest.main([__file__+"::test_match_pattern_with_bymonth_and_byweekday"]) # This fails
Although the parser works if you specify the arguments in the 'right' order, it is 'inflexible' in that it doesn't allow the optional arguments to be given in arbitrary order. This is why the second test fails.
What would be a way to make the parser parse the 'optional' fields in any order, such that both tests pass? (I was thinking of making an iterator with all permutations of the regular expressions and trying re.match
on each one, but this does not seem like an elegant solution).
At this point, your language is getting complex enough that it's time to ditch regular expressions and learn how to use a proper parsing library. I threw this together using pyparsing, and I've annotated it heavily to try and explain what's going on, but if anything's unclear do ask and I'll try to explain.
from pyparsing import Regex, oneOf, OneOrMore
# Boring old constants, I'm sure you know how to fill these out...
months = ['January', 'February']
weekdays = ['Monday', 'Tuesday']
frequencies = ['Daily', 'Weekly']
# A datetime expression is anything matching this regex. We could split it down
# even further to get day, month, year attributes in our results object if we felt
# like it
datetime_expr = Regex(r'(\d{4})-(\d\d?)-(\d\d?) (\d{2}):(\d{2}):(\d{2})')
# A from or till expression is the word "from" or "till" followed by any valid datetime
from_expr = 'from' + datetime_expr.setResultsName('from_')
till_expr = 'till' + datetime_expr.setResultsName('till')
# A range expression is a from expression followed by a till expression
range_expr = from_expr + till_expr
# A weekday is any old weekday
weekday_expr = oneOf(weekdays)
month_expr = oneOf(months)
frequency_expr = oneOf(frequencies)
# A by weekday expression is the words "by weekday" followed by one or more weekdays
by_weekday_expr = 'by weekday' + OneOrMore(weekday_expr).setResultsName('weekdays')
by_month_expr = 'by month' + OneOrMore(month_expr).setResultsName('months')
# A recurring interval, then, is a frequency, followed by a range, followed by
# a weekday and a month, in any order
recurring_interval = frequency_expr + range_expr + (by_weekday_expr & by_month_expr)
# Let's parse!
if __name__ == '__main__':
res = recurring_interval.parseString('Daily from 1111-11-11 11:00:00 till 1111-11-11 12:00:00 by weekday Monday by month January February')
# Note that setResultsName causes everything to get packed neatly into
# attributes for us, so we can pluck all the bits and pieces out with no
# difficulty at all
print res
print res.from_
print res.till
print res.weekdays
print res.months
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With