Novice Python user here - I'm attempting to Calculate the Business Hours between two dates in a pandas DataFrame given 9am-5pm, Mon-Fri Working Hours and to exclude Australian Public Holidays.
I have tried to hack together a lot of solutions over the past few days and apply it to my problem but I'm having significant trouble.
I will post my current iteration but also looking for feedback as the best way to handle this overall and to gain some understanding of how to tackle these problems in the future.
My lastest attempt is using pandas CDay then creating a custom holiday calendar for Australian dates which all seems to be working - it's then going from this step to applying it to the pandas dates which I am having trouble understanding. I am using a custom function from this https://codereview.stackexchange.com/questions/135142/calculate-working-minutes-between-two-timestamps/135200#135200 solution to count the minutes between the dates but having no luck.
Appreciate any help!
import datetime
from pandas.tseries.holiday import Holiday, AbstractHolidayCalendar
from pandas.tseries.offsets import CDay
class HolidayCalendar(AbstractHolidayCalendar):
rules =[Holiday('New Years Day',year=2016,month=1,day=1),
Holiday('Australia Day',year=2016,month=1,day=26),
Holiday('Good Friday',year=2016,month=3,day=25),
Holiday('Easter Monday',year=2016,month=3,day=28),
Holiday('ANZAC Day',year=2016,month=4,day=25),
Holiday('Queens Birthday',year=2016,month=6,day=13),
Holiday('Christmas Day',year=2016,month=12,day=25),
Holiday('Boxing Day',year=2016,month=12,day=26),
Holiday('New Years Day',year=2017,month=1,day=1),
Holiday('Australia Day',year=2017,month=1,day=26),
Holiday('Good Friday',year=2017,month=4,day=15),
Holiday('Easter Monday',year=2017,month=4,day=17),
Holiday('ANZAC Day',year=2017,month=4,day=25),
Holiday('Queens Birthday',year=2017,month=6,day=12),
Holiday('Christmas Day',year=2017,month=12,day=25),
Holiday('Boxing Day',year=2017,month=12,day=26),
Holiday('New Years Day',year=2018,month=1,day=1),
Holiday('Australia Day',year=2018,month=1,day=26),
Holiday('Good Friday',year=2018,month=3,day=30),
Holiday('Easter Monday',year=2018,month=4,day=2),
Holiday('ANZAC Day',year=2018,month=4,day=25),
Holiday('Queens Birthday',year=2018,month=6,day=11),
Holiday('Christmas Day',year=2018,month=12,day=25),
Holiday('Boxing Day',year=2018,month=12,day=26)]
cal = HolidayCalendar()
dayindex = pd.bdate_range(datetime.date(2015,1,1),datetime.date.today(),freq=CDay(calendar=cal))
day_series = dayindex.to_series()
def count_mins(start,end):
starttime = datetime.datetime.fromtimestamp(int(start)/1000)
endtime = datetime.datetime.fromtimestamp(int(end)/1000)
days = day_series[starttime.date():endtime.date()]
daycount = len(days)
if daycount == 0:
return daycount
else:
startday = datetime.datetime(days[0].year,
days[0].month,
days[0].day,
hour=9,
minute=0)
endday = datetime.datetime(days[-1].year,
days[-1].month,
days[-1].day,
hour=17,
minute=0)
if daycount == 1:
if starttime < startday:
periodstart = startday
else:
periodstart = starttime
if endtime > endday:
periodend = endday
else:
periodend = endtime
return (periodend - periodstart).seconds/60
if daycount == 2:
if starttime < startday:
first_day_mins = 480
else:
first_day_mins = (startday.replace(hour=17)-starttime).seconds/60
if endtime > endday:
second_day_mins = 480
else:
second_day_mins = (endtime-endday.replace(hour=9)).seconds/60
return (first_day_mins + second_day_mins)
else:
if starttime < startday:
first_day_mins = 480
else:
first_day_mins = (startday.replace(hour=17)-starttime).seconds/60
if endtime > endday:
second_day_mins = 480
else:
second_day_mins = (endtime-endday.replace(hour=9)).seconds/60
return (first_day_mins + second_day_mins + ((daycount-2)*480))
df_updated['Created Date'] = pd.to_datetime(df_updated['Created Date'])
df_updated['Updated Date'] = pd.to_datetime(df_updated['Updated Date'])
df_updated['Created Date'] = df_updated['Created Date'].astype(np.int64) /
int(1e6)
df_updated['Updated Date'] = df_updated['Updated Date'].astype(np.int64) /
int(1e6)
count_mins(df_updated['Created Date'], df_updated['Updated Date'])
Try out this package called business-duration in PyPi
pip install business-duration
from business_duration import businessDuration
import pandas as pd
from datetime import time,datetime
import holidays as pyholidays
startdate = pd.to_datetime('2017-01-01 00:00:00')
enddate = pd.to_datetime('2017-01-31 23:00:00')
starttime=time(9,0,0)
endtime=time(17,0,0)
holidaylist = pyholidays.Australia()
unit='hour'
#By default weekends are Saturday and Sunday
print(businessDuration(startdate,enddate,starttime,endtime,holidaylist=holidayli
st,unit=unit))
Output: 160.0
holidaylist:
{datetime.date(2017, 1, 1): "New Year's Day",
datetime.date(2017, 1, 2): "New Year's Day (Observed)",
datetime.date(2017, 1, 26): 'Australia Day',
datetime.date(2017, 3, 6): 'Canberra Day',
datetime.date(2017, 4, 14): 'Good Friday',
datetime.date(2017, 4, 15): 'Easter Saturday',
datetime.date(2017, 4, 17): 'Easter Monday',
datetime.date(2017, 4, 25): 'Anzac Day',
datetime.date(2017, 6, 12): "Queen's Birthday",
datetime.date(2017, 9, 26): 'Family & Community Day',
datetime.date(2017, 10, 2): 'Labour Day',
datetime.date(2017, 12, 25): 'Christmas Day',
datetime.date(2017, 12, 26): 'Boxing Day'}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With