from sklearn.datasets import make_blobs

import math
import scipy

import pandas as pd

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()

%matplotlib inline

plot_size   = 14
plot_width  = 5
plot_height = 5

params = {'legend.fontsize': 'large',
          'figure.figsize': (plot_width,plot_height),
          'axes.labelsize': plot_size,
          'axes.titlesize': plot_size,
          'xtick.labelsize': plot_size*0.75,
          'ytick.labelsize': plot_size*0.75,
          'axes.titlepad': 25}
plt.rcParams.update(params)


center_box = (100, 200) # defines the box that cluster centres are allowed to be in

n_customer = 200
n_facility = 5

facil_capacity  = 15000

custm_dem_low  = 30
custm_dem_high = 80

transp_unit_cost = 2
capacity_unit_cost = 100

np.random.seed(2)


custm_demands = np.random.randint(low=custm_dem_low, 
                                  high=custm_dem_high, 
                                  size=n_customer)


print(f'sum_facil ({n_facility* facil_capacity}) > sum_cust ({sum(custm_demands)}) = {n_facility* facil_capacity > sum(custm_demands)}')

sum_facil (75000) > sum_cust (11293) = True


cust_coord , _ = make_blobs(n_samples=n_customer, 
                            centers=2, 
                            cluster_std=20, 
                            center_box=center_box, 
                            random_state =  2)

facil_coord, _ = make_blobs(n_samples=n_facility, 
                            centers=1, 
                            cluster_std=30, 
                            center_box=center_box, 
                            random_state = 50)


custm_node_size = plot_size*10*((custm_demands/custm_dem_high)**2)
facil_node_size = plot_size*100

plt.scatter(cust_coord[:, 0], 
            cust_coord[:, 1],  
            s=custm_node_size, 
            cmap='viridis', 
            zorder=1500);

plt.scatter(facil_coord[:, 0], 
            facil_coord[:, 1], 
            s=facil_node_size, 
            cmap='viridis', 
            zorder=2000);

plt.axis('off')

(83.64235900634193, 232.54148477049307, 48.169009398292125, 214.30310861724314)


from scipy.spatial import distance

dist_matrix = distance.cdist(cust_coord, facil_coord, 'euclidean')
transp_cost_matrix = dist_matrix * transp_unit_cost


n_customer = len(cust_coord)
n_facility = len(facil_coord)


from pulp import *

prob = LpProblem('prob', LpMinimize)


S = [('S'+str(i+1)) for i in range(n_facility)]
D = [('C'+str(i+1)) for i in range(n_customer)]


t = {customer:{facility:(transp_cost_matrix[i][j]) for j,facility in enumerate(S)} for i,customer in enumerate(D)}
P = {facility:(facil_capacity) for j,facility in enumerate(S)}
c = {facility:(facil_capacity*capacity_unit_cost) for j,facility in enumerate(S)}
Q = {customer:(custm_demands[i]) for i,customer in enumerate(D)}


n_min = 5
n_max = 5


x = LpVariable.dicts('x', (D,S), lowBound = 0, cat = LpContinuous)
y = LpVariable.dicts('y', S, lowBound = 0, upBound = 1, cat = LpInteger)


prob += lpSum([lpSum([x[i][j]*t[i][j] for j in S]) for i in D]) + lpSum([c[j]*y[j] for j in S])


for i in D:
    prob += lpSum([x[i][j] for j in S]) == Q[i]

for j in S:
    prob += lpSum([x[i][j] for i in D]) <= P[j]*y[j]

prob += lpSum([y[j] for j in S]) >= n_min
prob += lpSum([y[j] for j in S]) <= n_max


status = prob.solve()

print(f'STATUS\n{LpStatus[status]}\n')

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /Users/pa01/opt/anaconda3/lib/python3.9/site-packages/pulp/solverdir/cbc/osx/64/cbc /var/folders/kk/14m2mg8s0w9c_tln749srnrw0000gp/T/2340fab067f84665b4f8716f83e07a12-pulp.mps timeMode elapsed branch printingOptions all solution /var/folders/kk/14m2mg8s0w9c_tln749srnrw0000gp/T/2340fab067f84665b4f8716f83e07a12-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 212 COLUMNS
At line 3243 RHS
At line 3451 BOUNDS
At line 3457 ENDATA
Problem MODEL has 207 rows, 1005 columns and 2015 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Continuous objective value is 8.01037e+06 - 0.00 seconds
Cgl0004I processed model has 0 rows, 0 columns (0 integer (0 of which binary)) and 0 elements
Cbc3007W No integer variables - nothing to do
Cuts at root node changed objective from 8.01037e+06 to -1.79769e+308
Probing was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
Gomory was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
Knapsack was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
Clique was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
MixedIntegerRounding2 was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
FlowCover was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
TwoMirCuts was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)
ZeroHalf was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)

Result - Optimal solution found

Objective value:                8010365.00234909
Enumerated nodes:               0
Total iterations:               0
Time (CPU seconds):             0.00
Time (Wallclock seconds):       0.02

Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.01   (Wallclock seconds):       0.03

STATUS
Optimal


chosen_facil = []
for j,facility in enumerate(S):
    if y[facility].varValue == 1:
        chosen_facil.append(j)
        print(f'We will be establishing a warehouse in {facility}')

chosen_count = len(chosen_facil)

print(f'\nWe are establishing a total of {chosen_count} facilities')

We will be establishing a warehouse in S1
We will be establishing a warehouse in S2
We will be establishing a warehouse in S3
We will be establishing a warehouse in S4
We will be establishing a warehouse in S5

We are establishing a total of 5 facilities


vect_orig = []
vect_dest = []
u = v = np.zeros((len(vect_orig),len(vect_orig)))

cust_distance_dict = dict()

for j,facility in enumerate(S):
    for i,customer in enumerate(D):
        if x[customer][facility].varValue > 0:
            vect_orig.append(facil_coord[j])
            vect_dest.append(cust_coord[i])
            dist =  distance.euclidean(facil_coord[j],cust_coord[i])
            cust_distance_dict[customer] = round(dist,1)
            
vect_orig = np.array(vect_orig)
vect_dest = np.array(vect_dest)


plt.scatter(cust_coord[:, 0], 
            cust_coord[:, 1], 
            s=plot_size*2, 
            cmap='viridis',
            zorder = 10000);


plt.scatter(facil_coord[:, 0], 
            facil_coord[:, 1], 
            s=plot_size*5, 
            cmap='viridis');

for i in range(len(vect_orig)):
    plt.arrow(vect_orig[i][0],
              vect_orig[i][1],
              vect_dest[i][0]-vect_orig[i][0],
              vect_dest[i][1]-vect_orig[i][1],
              color='black')
    
plt.scatter(facil_coord[chosen_facil, 0], 
            facil_coord[chosen_facil, 1], 
            s=plot_size*10, 
            cmap='viridis',
            zorder=99999)

plt.axis('off')

(83.63856852546967, 232.5461688022704, 48.16625778748879, 214.30759451598573)


df_distances = pd.DataFrame(cust_distance_dict.items(), columns=['Customer', 'Distance'])


df_distances.head(5)


df_distances['Distance'].describe()

count    200.000000
mean      22.608000
std       10.418081
min        0.000000
25%       14.500000
50%       22.350000
75%       30.900000
max       56.100000
Name: Distance, dtype: float64


dist_min =  df_distances['Distance'].min()
dist_max =  df_distances['Distance'].max()
dist_avg =  df_distances['Distance'].mean()
dist_sum =  df_distances['Distance'].sum()


print(f'Analysis for { chosen_count } facilities: \n')

print(f'Total distance  : { dist_sum:.2f}')
print(f'Minimum distance: { dist_min:.2f}')
print(f'Maximum distance: { dist_max:.2f}')
print(f'Average distance: { dist_avg:.2f}')

Analysis for 5 facilities: 

Total distance  : 4521.60
Minimum distance: 0.00
Maximum distance: 56.10
Average distance: 22.61


df_distances.hist(column='Distance')

array([[<AxesSubplot:title={'center':'Distance'}>]], dtype=object)


hist_bins = [0, 25, 50, 75, 100]
df_distances.hist(column='Distance', bins = hist_bins)

array([[<AxesSubplot:title={'center':'Distance'}>]], dtype=object)


freq_counts = df_distances['Distance'].value_counts(bins=hist_bins,sort=False)

df_counts = freq_counts.to_frame().reset_index()

df_counts


def count_less(imax):
    return len(df_distances[df_distances['Distance']<imax])


df_cum = pd.DataFrame(range(10,110,10), columns=['Max'])
df_cum['Range'] = '0 - ' + df_cum['Max'].astype(str)
df_cum['Count'] =df_cum['Max'].apply(count_less)

df_cum['Percentage'] = round((df_cum['Count']/n_customer*100),1)
df_cum['Percentage'] = df_cum['Percentage'].astype(str) + '%'

df_cum

	Customer	Distance
0	C7	15.2
1	C42	11.3
2	C53	36.7
3	C58	27.2
4	C65	33.4

	index	Distance
0	(-0.001, 25.0]	115
1	(25.0, 50.0]	84
2	(50.0, 75.0]	1
3	(75.0, 100.0]	0

	Max	Range	Count	Percentage
0	10	0 - 10	25	12.5%
1	20	0 - 20	80	40.0%
2	30	0 - 30	144	72.0%
3	40	0 - 40	193	96.5%
4	50	0 - 50	199	99.5%
5	60	0 - 60	200	100.0%
6	70	0 - 70	200	100.0%
7	80	0 - 80	200	100.0%
8	90	0 - 90	200	100.0%
9	100	0 - 100	200	100.0%

Estimating Levels of Service¶