diff --git a/paper.pdf b/paper.pdf index 132288a..7d6ed23 100644 Binary files a/paper.pdf and b/paper.pdf differ diff --git a/paper.tex b/paper.tex index 0cded62..333f2f1 100644 --- a/paper.tex +++ b/paper.tex @@ -37,6 +37,9 @@ \input{tex/4_stu/6_fams} \input{tex/4_stu/7_pixels_intervals} \input{tex/5_con/1_intro} +\input{tex/5_con/2_generalizability} +\input{tex/5_con/3_implications} +\input{tex/5_con/4_further_research} \newpage \input{tex/glossary} diff --git a/tex/4_stu/4_overall.tex b/tex/4_stu/4_overall.tex index dff9559..4d11718 100644 --- a/tex/4_stu/4_overall.tex +++ b/tex/4_stu/4_overall.tex @@ -44,13 +44,6 @@ As the non-seasonal \textit{hses} reaches a similar accuracy as its So, in the absence of seasonality, models that only model a trend part are the least susceptible to the noise. -For medium demand (i.e., $10 < \text{ADD} < 25$) and training horizons up to - six weeks, the best-performing models are the same as for low demand. -For longer horizons, \textit{hets} provides the highest accuracy. -Thus, to fit a seasonal pattern, longer training horizons are needed. -While \textit{vsvr} enters the top three, \textit{hets} has the edge as they - neither require parameter tuning nor real-time data. - \begin{center} \captionof{table}{Top-3 models by training weeks and average demand ($1~\text{km}^2$ pixel size, 60-minute time steps)} @@ -206,6 +199,13 @@ While \textit{vsvr} enters the top three, \textit{hets} has the edge as they \end{tabular} \end{center} +For medium demand (i.e., $10 < \text{ADD} < 25$) and training horizons up to + six weeks, the best-performing models are the same as for low demand. +For longer horizons, \textit{hets} provides the highest accuracy. +Thus, to fit a seasonal pattern, longer training horizons are needed. +While \textit{vsvr} enters the top three, \textit{hets} has the edge as they + neither require parameter tuning nor real-time data. + In summary, except for high demand, simple models trained on horizontal time series work best. By contrast, high demand (i.e., $25 < \text{ADD} < \infty$) and less than diff --git a/tex/4_stu/6_fams.tex b/tex/4_stu/6_fams.tex index 8fdd17d..c398824 100644 --- a/tex/4_stu/6_fams.tex +++ b/tex/4_stu/6_fams.tex @@ -1,19 +1,6 @@ \subsection{Results by Model Families} \label{fams} -Besides the overall results, we provide an in-depth comparison of models - within a family. -Instead of reporting the MASE per model, we rank the models holding the - training horizon fixed to make comparison easier. -Table \ref{t:hori} presents the models trained on horizontal time series. -In addition to \textit{naive}, we include \textit{fnaive} and \textit{pnaive} - already here as more competitive benchmarks. -The tables in this section report two rankings simultaneously: -The first number is the rank resulting from lumping the low and medium - clusters together, which yields almost the same rankings when analyzed - individually. -The ranks from only high demand pixels are in parentheses if they differ. - \begin{center} \captionof{table}{Ranking of benchmark and horizontal models ($1~\text{km}^2$ pixel size, 60-minute time steps): @@ -47,6 +34,19 @@ The ranks from only high demand pixels are in parentheses if they differ. \end{center} \ +Besides the overall results, we provide an in-depth comparison of models + within a family. +Instead of reporting the MASE per model, we rank the models holding the + training horizon fixed to make comparison easier. +Table \ref{t:hori} presents the models trained on horizontal time series. +In addition to \textit{naive}, we include \textit{fnaive} and \textit{pnaive} + already here as more competitive benchmarks. +The tables in this section report two rankings simultaneously: +The first number is the rank resulting from lumping the low and medium + clusters together, which yields almost the same rankings when analyzed + individually. +The ranks from only high demand pixels are in parentheses if they differ. + A first insight is that \textit{fnaive} is the best benchmark in all scenarios: Decomposing flexibly by tuning the $ns$ parameter is worth the computational diff --git a/tex/5_con/1_intro.tex b/tex/5_con/1_intro.tex index 42126bd..9188f96 100644 --- a/tex/5_con/1_intro.tex +++ b/tex/5_con/1_intro.tex @@ -1,2 +1,6 @@ \section{Conclusion} -\label{con} \ No newline at end of file +\label{con} + +We conclude this paper by elaborating on how the findings are transferable + to similar settings, providing some implications for a UDP's + managers, and discussing further research opportunities. diff --git a/tex/5_con/2_generalizability.tex b/tex/5_con/2_generalizability.tex new file mode 100644 index 0000000..5275fc0 --- /dev/null +++ b/tex/5_con/2_generalizability.tex @@ -0,0 +1,23 @@ +\subsection{Generalizability of the Methology and Findings} +\label{generalizability} + +Whereas forecasting applications are always data-specific, the following + aspects generalize to UDPs with ad-hoc transportation services: +\begin{itemize} +\item \textbf{Double Seasonality}: +The double seasonality causes a periodicity $k$ too large to be modeled by + classical models, and we adapt the STL method in the \textit{fnaive} model + such that it "flexibly" fits a seasonal pattern changing in a non-trivial + way over time. +\item \textbf{Order Sparsity}: +The intermittent time series resulting from gridification require simple + methods like \textit{hsma} or \textit{trivial} that are not as susceptible + to noise as more sophisticated ones. +\item \textbf{Unified CV}: +A CV unified around a whole day allows evaluating classical statistical and ML + methods on the same scale. +It is agnostic of both the type of the time series and retraining. +\item \textbf{Error Measure}: +Analogous to \cite{hyndman2006}, we emphasize the importance of choosing a + consistent error measure, and argue for increased use of MASE. +\end{itemize} diff --git a/tex/5_con/3_implications.tex b/tex/5_con/3_implications.tex new file mode 100644 index 0000000..6fe91a4 --- /dev/null +++ b/tex/5_con/3_implications.tex @@ -0,0 +1,58 @@ +\subsection{Managerial Implications} +\label{implications} + +Even though zeitgeist claims that having more data is always better, our study + shows this is not the case here: +First, under certain circumstances, accuracy may go up with shorter training + horizons. +Second, none of the external data sources improves the accuracies. +Somewhat surprisingly, despite ML-based methods` popularity in both business + and academia in recent years, we must conclude that classical forecasting + methods suffice to reach the best accuracy in our study. +There is one case where ML-based methods are competitive in our case study: + In a high demand pixel, if only about four to six weeks of past data is + available, the \textit{vrfr} model outperformed the classical ones. +So, we recommend trying out ML-based methods in such scenarios. +In addition, with the \textit{hsma} and \textit{hets} models being the overall + winners, incorporating real-time data is not beneficial, in particular, + with more than six weeks of training material available. +Lastly, with just \textit{hets}, that exhibits an accuracy comparable to + \textit{hsma} for low and medium demand, our industry partner can likely + schedule its shifts on an hourly basis one week in advance. + +This study gives rise to the following managerial implications. +First, UDPs can implement readily available forecasting algorithms with limited + effort. +This, however, requires purposeful data collection and preparation by those + companies, which, according to our study, is at least equally important as + the selection of the forecasting algorithm, as becomes clear from + investigating the impact of the length of the training horizon. +Second, the benefits of moving from manual forecasting to automated forecasting + include being able to pursue a predictive routing strategy and + demand-adjusted shift scheduling. +At the time the case study data was collected, our industry partner did not + conduct any forecasting; the only forecasting-related activities were the + shift managers scheduling the shifts one week in advance manually in + spreadsheets. +Thus, selecting the right forecasting algorithm according to the framework + proposed in this study becomes a prerequisite to the much needed + operational improvements UDPs need to achieve in their quest for + profitability. +In general, many UDPs launched in recent years are venture capital backed + start-up companies that almost by definition do not have a strong + grounding in operational excellence, and publications such as the ones by + Uber are the exception rather than the rule. +Our paper shows that forecasting the next couple of hours can already be + implemented within the first year of a UDP's operations. +Even if such forecasts could not be exploited by predictive routing (e.g., due + to prolonged waiting times at restaurants), they would help monitoring the + operations for exceptional events. +Additionally, the shift planning may be automated saving as much as one shift + manager per city. +We emphasize that for the most part, our proposed forecasting system + is calibrated automatically and no manual work by a data scientist is required. +The only two parameters where assumptions need to be made are the pixel size + and the time step. +While they can only be optimized by the data scientist over time, the results in our + empirical study suggest that a pixel size of $1~\text{km}^2$ and a time step of + one hour are ideal. \ No newline at end of file diff --git a/tex/5_con/4_further_research.tex b/tex/5_con/4_further_research.tex new file mode 100644 index 0000000..be2a006 --- /dev/null +++ b/tex/5_con/4_further_research.tex @@ -0,0 +1,45 @@ +\subsection{Further Research} +\label{further_research} + +Sub-sections \ref{overall_results} and \ref{fams} present the models' average + performance. +We did not research what is the best model in a given pixel on a given day. +To answer this, a study finding an optimal number of outer validation days is + neccessary. +With the varying effect of the training horizon, this model selection is a + two-dimensional grid search that is prone to overfitting due to the high + noise in low count data. +Except heuristics relating the ADD to the training horizon, we cannot say + anything about that based on our study. +\cite{lemke2010} and \cite{wang2009} show how, for example, a time series' + characteristics may be used to select models. +Thus, we suggest conducting more detailed analyses on how to incorporate model + selection into our proposed forecasting system. + +Future research should also integrate our forecasting system into a predictive + routing application and evaluate its business impact. +This embeds our research into the vast literature on the VRP. +Initially introduced by \cite{dantzig1959}, \gls{vrp}s are concerned with + finding optimal routes serving customers. +We refer to \cite{toth2014} for a comprehensive overview. +The two variants relevant for the UDP case are the dynamic VRP and + the pickup and delivery problem (\gls{pdp}). +A VRP is dynamic if the data to solve a problem only becomes available + as the operations are underway. +\cite{thomas2010}, \cite{pillac2013}, and \cite{psaraftis2016} describe how + technological advances, in particular, mobile technologies, have led to a + renewed interest in research on dynamic VRPs, and + \cite{berbeglia2010} provide a general overview. +\cite{ichoua2006} and \cite{ferrucci2013} provide solution methods for + simulation studies where they assume stochastic customer demand based on + historical distributions. +In both studies, dummy demand nodes are inserted into the VRP instance. +Forecasts by our system extend this idea naturally as dummy nodes could be + derived from point forecasts as well. +The concrete case of a meal delivering UDP is contained in a recent + literature stream started by \cite{ulmer2017} and extended by + \cite{reyes2018} and \cite{yildiz2018}: They coin the term meal delivery + routing problem (\gls{mdrp}). +The MDRP is a special case of the dynamic PDP where the defining + characteristic is that once a vehicle is scheduled, a modification of the + route is inadmissible. diff --git a/tex/glossary.tex b/tex/glossary.tex index ea86d45..63cb402 100644 --- a/tex/glossary.tex +++ b/tex/glossary.tex @@ -11,9 +11,15 @@ \newglossaryentry{mase}{ name=MASE, description={Mean Absolute Scaled Error} } +\newglossaryentry{mdrp}{ + name=MDRP, description={Meal Delivery Routing Proplem} +} \newglossaryentry{ml}{ name=ML, description={Machine Learning} } +\newglossaryentry{pdp}{ + name=PDP, description={Pickup and Delivery Problem} +} \newglossaryentry{rf}{ name=RF, description={Random Forest} } diff --git a/tex/references.bib b/tex/references.bib index 05fd42b..8926f3f 100644 --- a/tex/references.bib +++ b/tex/references.bib @@ -44,6 +44,18 @@ howpublished = {\url{https://eng.uber.com/forecasting-introduction/}}, note = {Accessed: 2020-10-01} } +@article{berbeglia2010, +title={Dynamic Pickup and Delivery Problems}, +author={Berbeglia, Gerardo and Cordeau, Jean-Fran{\c{c}}ois + and Laporte, Gilbert}, +year={2010}, +journal={European Journal of Operational Research}, +volume={202}, +number={1}, +pages={8--15}, +publisher={Elsevier} +} + @article{box1962, title={Some statistical Aspects of adaptive Optimization and Control}, author={Box, George and Jenkins, Gwilym}, @@ -167,6 +179,17 @@ year={2016}, publisher={Springer} } +@article{dantzig1959, +title={The truck dispatching problem}, +author={Dantzig, George and Ramser, John}, +year={1959}, +journal={Management science}, +volume={6}, +number={1}, +pages={80--91}, +publisher={Informs} +} + @article{de2006, title={25 Years of Time Series Forecasting}, author={De Gooijer, Jan and Hyndman, Rob}, @@ -196,6 +219,18 @@ volume={116}, pages={242--265} } +@article{ferrucci2013, +title={A pro-active real-time Control Approach for Dynamic Vehicle Routing + Problems dealing with the Delivery of urgent Goods}, +author={Ferrucci, Francesco and Bock, Stefan and Gendreau, Michel}, +year={2013}, +journal={European Journal of Operational Research}, +volume={225}, +number={1}, +pages={130--141}, +publisher={Elsevier} +} + @article{gardner1985, title={Forecasting Trends in Time Series}, author={Gardner, Everette and McKenzie, Ed}, @@ -329,6 +364,18 @@ year={2018}, publisher={OTexts} } +@article{ichoua2006, +title={Exploiting Knowledge about Future Demands for Real-time Vehicle + Dispatching}, +author={Ichoua, Soumia and Gendreau, Michel and Potvin, Jean-Yves}, +year={2006}, +journal={Transportation Science}, +volume={40}, +number={2}, +pages={211--225}, +publisher={INFORMS} +} + @article{kim2016, title={A new Metric of Absolute Percentage Error for Intermittent Demand Forecasts}, @@ -362,6 +409,17 @@ howpublished = {\url{https://eng.uber.com/neural-networks/}}, note = {Accessed: 2020-10-01} } +@article{lemke2010, +title={Meta-Learning for Time Series Forecasting and Forecast Combination}, +author={Lemke, Christiane and Gabrys, Bogdan}, +year={2010}, +journal={Neurocomputing}, +volume={73}, +number={10-12}, +pages={2006--2016}, +publisher={Elsevier} +} + @article{ma2018, title={Using the Gradient Boosting Decision Tree to Improve the Delineation of Hourly Rain Areas during the Summer from Advanced Himawari Imager Data}, @@ -430,6 +488,18 @@ number={5}, pages={311--315} } +@article{pillac2013, +title={A Review of Dynamic Vehicle Routing Problems}, +author={Pillac, Victor and Gendreau, Michel and Gu{\'e}ret, Christelle + and Medaglia, Andr{\'e}s L}, +year={2013}, +journal={European Journal of Operational Research}, +volume={225}, +number={1}, +pages={1--11}, +publisher={Elsevier} +} + @article{prestwich2014, title={Mean-based Error Measures for Intermittent Demand Forecasting}, author={Prestwich, Steven and Rossi, Roberto and Tarim, Armagan @@ -442,6 +512,25 @@ pages={6782--6791}, publisher={Taylor \& Francis} } +@article{psaraftis2016, +title={Dynamic Vehicle Routing Problems: Three Decades and Counting}, +author={Psaraftis, Harilaos and Wen, Min and Kontovas, Christos}, +year={2016}, +journal={Networks}, +volume={67}, +number={1}, +pages={3--31}, +publisher={Wiley Online Library} +} + +@article{reyes2018, +title={The Meal Delivery Routing Problem}, +author={Reyes, Damian and Erera, Alan and Savelsbergh, Martin + and Sahasrabudhe, Sagar and O’Neil, Ryan}, + year={2018}, +journal={Optimization Online} +} + @incollection{scholkopf1998, title={Fast Approximation of Support Vector Kernel Expansions, and an Interpretation of Clustering as Approximation in Feature Spaces}, @@ -501,6 +590,30 @@ number={4}, pages={715--725} } +@article{thomas2010, +title={Dynamic vehicle routing}, +author={Thomas, Barrett W}, +year={2010}, +journal={Wiley Encyclopedia of Operations Research and Management Science}, +publisher={Wiley Online Library} +} + +@book{toth2014, +title={Vehicle Routing: Problems, Methods, and Applications}, +author={Toth, Paolo and Vigo, Daniele}, +year={2014}, +publisher={SIAM} +} + +@techreport{ulmer2017, +title={The Restaurant Meal Delivery Problem: Dynamic Pick-up and Delivery with + Deadlines and Random Ready Times}, +author={Ulmer, Marlin and Thomas, Barrett and Campbell, Ann Melissa + and Woyak, Nicholas}, +year={2017}, +institution={Technical Report} +} + @article{vapnik1963, title={Pattern Recognition using Generalized Portrait Method}, author={Vapnik, Vladimir and Lerner, A}, @@ -525,6 +638,18 @@ year={2013}, publisher={Springer} } +@article{wang2009, +title={Rule Induction for Forecasting Method Selection: + Meta-learning the Characteristics of Univariate Time Series}, +author={Wang, Xiaozhe and Smith-Miles, Kate and Hyndman, Rob}, +year={2009}, +journal={Neurocomputing}, +volume={72}, +number={10-12}, +pages={2581--2594}, +publisher={Elsevier} +} + @article{wang2018, title={Delivering meals for multiple suppliers: Exclusive or sharing logistics service}, @@ -555,4 +680,11 @@ journal={Management Science}, volume={6}, number={3}, pages={324--342} +} + +@article{yildiz2018, +title={Provably High-Quality Solutions for the Meal Delivery Routing Problem}, +author={Yildiz, Baris and Savelsbergh, Martin}, + year={2018}, +journal={Optimization Online} } \ No newline at end of file