delivery_cost_expanded <- delivery_cost |># Split the volume range into an objectmutate(range_obj = purrr::map(`Vol Range`, str_split, " - ") ) |># Unnest the object for individual referenceunnest(range_obj) |>unnest_wider(range_obj, names_sep ="_") |># Handle the "1350+" scenariomutate(min_vol = purrr::map_chr(range_obj_1, str_replace, "\\+", ""), max_vol =ifelse(is.na(range_obj_2), (2^31) -1, range_obj_2) ) |># Turn volumes from charaters to integersmutate(across(min_vol:max_vol, as.integer) ) |># Drop irrelevant columnsselect(-c(range_obj_1, range_obj_2, `Vol Range`))
annual_cust_volume <-# Take transaction level data transactions |># Bring in the customer profile for the `Cold Drink Channel`inner_join( customer_profile, join_by(CUSTOMER_NUMBER) ) |># Get annual cases/gallons by customergroup_by(YEAR, CUSTOMER_NUMBER, COLD_DRINK_CHANNEL) |>summarise(annual_cases =sum(DELIVERED_CASES), annual_gallons =sum(DELIVERED_GALLONS), .groups ="drop" )
cust_addr_expanded <- customer_address |># Split the full address into an objectmutate(addr_obj = purrr::map(full.address, str_split, ",") ) |># Unnest the object for individual referenceunnest(addr_obj) |>unnest_wider(addr_obj, names_sep ="_") |># Pad the zip code with leading zeros and make a charactermutate(zip =str_pad(zip, 5, "left", pad ="0") ) |># Rename columnsrename(city = addr_obj_2, state = addr_obj_3, state_abbr = addr_obj_4, county = addr_obj_5, lat = addr_obj_7, lon = addr_obj_8 ) |># Turn lat/lon values to numericmutate(across(lat:lon, as.numeric) ) |># Drop irrelevant columnsselect(-c(full.address, addr_obj_1, addr_obj_6))
combined_data_raw <-# Take transactions transactions |># Join the customer profile data theretoinner_join( customer_profile |>mutate(zip =str_pad( ZIP_CODE, 5, "left", "0" )), join_by(CUSTOMER_NUMBER) ) |># Join the customer address data theretoinner_join( cust_addr_expanded, join_by(zip) ) |># Join the delivery cost tiers data theretoinner_join( delivery_cost_tiers, join_by(YEAR, CUSTOMER_NUMBER) )
combined_data_std <-# Take the combined data from above combined_data_raw |># Standardize the names janitor::clean_names() |># Standardize data typesmutate(# Convert charater dates to date typesacross(c(transaction_date, first_delivery_date, on_boarding_date), lubridate::mdy), # Turn IDs into charactersacross(c(customer_number, primary_group_number), as.character), # Turn finite categorical fields into factorsacross(c(order_type, cold_drink_channel, frequent_order_type, trade_channel, sub_trade_channel, state, state_abbr), as.factor ) ) |># Remove irrelevant columnsselect(-c(zip_code))
swire_data_full <- combined_data_std |># Add new fieldsmutate(# Calculate delivered gallons cost# Assume a return is only half as costly as a normal deliverydelivered_gallons_cost =case_when( delivered_gallons <0~-1* delivered_gallons * gallon_delivery_cost /2, TRUE~ delivered_gallons * gallon_delivery_cost ), # Calculate delivered case cost# Assume a return is only half as costly as a normal deliverydelivered_cases_cost =case_when( delivered_cases <0~-1* delivered_cases * case_delivery_cost /2, TRUE~ delivered_cases * case_delivery_cost ),# Create 'total' columns representing the sum of gallons & casesordered_total = ordered_gallons + ordered_cases, loaded_total = loaded_gallons + loaded_cases, delivered_total = delivered_gallons + delivered_cases, ) |>group_by(year, primary_group_number) |>mutate(# Calculate number of customers belonging to each primary group by yearprimary_group_customers =ifelse(is.na(primary_group_number), 0, n_distinct(customer_number)) ) |>group_by(year, customer_number) |>mutate(# Calculate how often a customer issues a return each yearreturn_frequency =sum(ifelse(delivered_cases <0| delivered_gallons <0, 1, 0)) ) |>ungroup() |># Drop select columns that are no longer relevantselect(-c(gallon_delivery_cost, case_delivery_cost)) |># Order the columns logicallyselect(# CUSTOMER PROFILE ITEMS customer_number, primary_group_number, primary_group_customers, on_boarding_date, first_delivery_date, cold_drink_channel, frequent_order_type, trade_channel, sub_trade_channel, local_market_partner, co2_customer, city, zip, state, state_abbr, county, lat, lon, # TRANSACTION DETAILS transaction_date, week, year, order_type, ordered_cases, loaded_cases, delivered_cases, delivered_cases_cost, ordered_gallons, loaded_gallons, delivered_gallons, delivered_gallons_cost, ordered_total, loaded_total, delivered_total, return_frequency )