Merge branch 'model-section' into develop
This commit is contained in:
commit
20abf8eade
26 changed files with 1354 additions and 6 deletions
BIN
paper.pdf
BIN
paper.pdf
Binary file not shown.
14
paper.tex
14
paper.tex
|
@ -19,6 +19,16 @@
|
||||||
\input{tex/2_lit/3_ml/4_rf}
|
\input{tex/2_lit/3_ml/4_rf}
|
||||||
\input{tex/2_lit/3_ml/5_svm}
|
\input{tex/2_lit/3_ml/5_svm}
|
||||||
\input{tex/3_mod/1_intro}
|
\input{tex/3_mod/1_intro}
|
||||||
|
\input{tex/3_mod/2_overall}
|
||||||
|
\input{tex/3_mod/3_grid}
|
||||||
|
\input{tex/3_mod/4_cv}
|
||||||
|
\input{tex/3_mod/5_mase}
|
||||||
|
\input{tex/3_mod/6_decomp}
|
||||||
|
\input{tex/3_mod/7_models/1_intro}
|
||||||
|
\input{tex/3_mod/7_models/2_hori}
|
||||||
|
\input{tex/3_mod/7_models/3_vert}
|
||||||
|
\input{tex/3_mod/7_models/4_rt}
|
||||||
|
\input{tex/3_mod/7_models/5_ml}
|
||||||
\input{tex/4_stu/1_intro}
|
\input{tex/4_stu/1_intro}
|
||||||
\input{tex/5_con/1_intro}
|
\input{tex/5_con/1_intro}
|
||||||
|
|
||||||
|
@ -29,6 +39,10 @@
|
||||||
|
|
||||||
\appendix
|
\appendix
|
||||||
\newpage
|
\newpage
|
||||||
|
\input{tex/apx/tabular_ml_models}
|
||||||
|
\newpage
|
||||||
|
\input{tex/apx/enhanced_feats}
|
||||||
|
\newpage
|
||||||
|
|
||||||
\bibliographystyle{static/elsarticle-harv}
|
\bibliographystyle{static/elsarticle-harv}
|
||||||
\bibliography{tex/references}
|
\bibliography{tex/references}
|
||||||
|
|
BIN
static/cross_validation_gray.png
Normal file
BIN
static/cross_validation_gray.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 4.5 KiB |
BIN
static/gridification_for_paris_gray.png
Normal file
BIN
static/gridification_for_paris_gray.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 261 KiB |
BIN
static/model_inputs_gray.png
Normal file
BIN
static/model_inputs_gray.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 84 KiB |
77
static/slashbox.sty
Normal file
77
static/slashbox.sty
Normal file
|
@ -0,0 +1,77 @@
|
||||||
|
% slashbox.sty by Koichi Yasuoka, May 27, 1993
|
||||||
|
% minor modification by Toru Sato, May 31, 1993
|
||||||
|
\typeout{slashbox style by K.Yasuoka, May 1993.}%
|
||||||
|
\newbox\@slashboxa
|
||||||
|
\newbox\@slashboxb
|
||||||
|
\newbox\@slashboxc
|
||||||
|
\newcount\@slashboxwd
|
||||||
|
\newcount\@slashboxht
|
||||||
|
\newdimen\@slashsepl
|
||||||
|
\newdimen\@slashsepr
|
||||||
|
\def\slashbox{%
|
||||||
|
\def\@slashboxpicture##1{%
|
||||||
|
\put(0,0){\line(##1,1){\@slashboxwd}}%
|
||||||
|
\put(0,\@slashboxht){\makebox(0,0)[tl]{\box\@slashboxa}}%
|
||||||
|
\put(\@slashboxwd,0){\makebox(0,0)[br]{\box\@slashboxb}}%
|
||||||
|
}%
|
||||||
|
\@slashbox
|
||||||
|
}%
|
||||||
|
\def\backslashbox{%
|
||||||
|
\def\@slashboxpicture##1{%
|
||||||
|
\put(0,\@slashboxht){\line(##1,-1){\@slashboxwd}}%
|
||||||
|
\put(0,0){\makebox(0,0)[bl]{\box\@slashboxa}}%
|
||||||
|
\put(\@slashboxwd,\@slashboxht){\makebox(0,0)[tr]{\box\@slashboxb}}%
|
||||||
|
}%
|
||||||
|
\@slashbox
|
||||||
|
}%
|
||||||
|
\def\@slashbox{\@ifnextchar [{\@@slashbox}{\@@slashbox[0pt]}}
|
||||||
|
\def\@@slashbox[#1]{\@ifnextchar [{\@@@slashbox[#1]}{\@@@slashbox[#1][c]}}
|
||||||
|
\def\@@@slashbox[#1][#2]#3#4{%
|
||||||
|
% #1: width, #2: suppression of \tabcolsep on `l', `r', or `lr' side
|
||||||
|
% #3: left item, #4: right item
|
||||||
|
\@slashsepl=\tabcolsep
|
||||||
|
\@slashsepr=\tabcolsep
|
||||||
|
\@tfor\@tempa :=#2\do{\expandafter\let
|
||||||
|
\csname @slashsep\@tempa\endcsname=\z@}%
|
||||||
|
\setbox\@slashboxa=\hbox{\strut\hskip\tabcolsep\shortstack[l]{#3}}%
|
||||||
|
\setbox\@slashboxb=\hbox{\shortstack[r]{#4}\hskip\tabcolsep\strut}%
|
||||||
|
\setbox\@slashboxa=\hbox{\raise\dp\@slashboxa\box\@slashboxa}%
|
||||||
|
\setbox\@slashboxb=\hbox{\raise\dp\@slashboxb\box\@slashboxb}%
|
||||||
|
\setbox\@slashboxc=\hbox{%
|
||||||
|
\@tempdima=\wd\@slashboxa
|
||||||
|
\advance\@tempdima by \wd\@slashboxb
|
||||||
|
\advance\@tempdima by \@slashsepl
|
||||||
|
\advance\@tempdima by \@slashsepr
|
||||||
|
\@tempdimb=#1\relax%
|
||||||
|
\ifdim\@tempdimb>\@tempdima \@tempdima=\@tempdimb\fi%
|
||||||
|
\@tempdimb=\ht\@slashboxa
|
||||||
|
\advance\@tempdimb by \dp\@slashboxa
|
||||||
|
\advance\@tempdimb by \ht\@slashboxb
|
||||||
|
\advance\@tempdimb by \dp\@slashboxb
|
||||||
|
\@tempcnta=\@tempdima
|
||||||
|
\@tempcntb=\@tempdimb
|
||||||
|
\advance\@tempcnta by \@tempcntb
|
||||||
|
\advance\@tempcnta by -1
|
||||||
|
\divide\@tempcnta by \@tempcntb
|
||||||
|
\ifnum\@tempcnta>6 \@tempcnta=6
|
||||||
|
\@tempdimb=0.166666666\@tempdima
|
||||||
|
\else
|
||||||
|
\ifnum\@tempcnta<1 \@tempcnta=1\fi
|
||||||
|
\@tempdima=\@tempdimb
|
||||||
|
\multiply\@tempdima by \@tempcnta
|
||||||
|
\fi%
|
||||||
|
\advance\@tempdima by -\@slashsepl
|
||||||
|
\advance\@tempdima by -\@slashsepr
|
||||||
|
\@slashboxwd=\@tempdima
|
||||||
|
\@slashboxht=\@tempdimb
|
||||||
|
\@tempcntb=\@slashsepl
|
||||||
|
\setlength{\unitlength}{1sp}%
|
||||||
|
\begin{picture}(\@slashboxwd,\@slashboxht)(\@tempcntb,0)
|
||||||
|
\advance\@tempdima by \@slashsepl
|
||||||
|
\advance\@tempdima by \@slashsepr
|
||||||
|
\@slashboxwd=\@tempdima
|
||||||
|
\@slashboxpicture{\@tempcnta}
|
||||||
|
\end{picture}%
|
||||||
|
}%
|
||||||
|
$\vcenter{\box\@slashboxc}$%
|
||||||
|
}%
|
BIN
static/stl_gray.png
Normal file
BIN
static/stl_gray.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 104 KiB |
|
@ -1,8 +1,6 @@
|
||||||
\section{Model Formulation}
|
\section{Model Formulation}
|
||||||
\label{mod}
|
\label{mod}
|
||||||
|
|
||||||
% temporary placeholders
|
In this section, we describe how the platform's raw data are pre-processed
|
||||||
\label{decomp}
|
into model inputs and how the forecasting models are built and benchmarked
|
||||||
\label{f:stl}
|
against each other.
|
||||||
\label{mase}
|
|
||||||
\label{unified_cv}
|
|
||||||
|
|
28
tex/3_mod/2_overall.tex
Normal file
28
tex/3_mod/2_overall.tex
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
\subsection{Overall Approach}
|
||||||
|
\label{approach_approach}
|
||||||
|
|
||||||
|
On a conceptual level, there are three distinct aspects of the model
|
||||||
|
development process.
|
||||||
|
First, a pre-processing step transforms the platform's tabular order data into
|
||||||
|
either time series in Sub-section \ref{grid} or feature matrices in
|
||||||
|
Sub-section \ref{ml_models}.
|
||||||
|
Second, a benchmark methodology is developed in Sub-section \ref{unified_cv}
|
||||||
|
that compares all models on the same scale, in particular, classical
|
||||||
|
models with ML ones.
|
||||||
|
Concretely, the CV approach is adapted to the peculiar requirements of
|
||||||
|
sub-daily and ordinal time series data.
|
||||||
|
This is done to maximize the predictive power of all models into the future
|
||||||
|
and to compare them on the same scale.
|
||||||
|
Third, the forecasting models are described with respect to their assumptions
|
||||||
|
and training requirements.
|
||||||
|
Four classification dimensions are introduced:
|
||||||
|
\begin{enumerate}
|
||||||
|
\item \textbf{Timeliness of the Information}:
|
||||||
|
whole-day-ahead vs. real-time forecasts
|
||||||
|
\item \textbf{Time Series Decomposition}: raw vs. decomposed
|
||||||
|
\item \textbf{Algorithm Type}: "classical" statistics vs. ML
|
||||||
|
\item \textbf{Data Sources}: pure vs. enhanced (i.e., with external data)
|
||||||
|
\end{enumerate}
|
||||||
|
Not all of the possible eight combinations are implemented; instead, the
|
||||||
|
models are varied along these dimensions to show different effects and
|
||||||
|
answer the research questions.
|
95
tex/3_mod/3_grid.tex
Normal file
95
tex/3_mod/3_grid.tex
Normal file
|
@ -0,0 +1,95 @@
|
||||||
|
\subsection{Gridification, Time Tables, and Time Series Generation}
|
||||||
|
\label{grid}
|
||||||
|
|
||||||
|
The platform's tabular order data are sliced with respect to both location and
|
||||||
|
time and then aggregated into time series where an observation tells
|
||||||
|
the number of orders in an area for a time step/interval.
|
||||||
|
Figure \ref{f:grid} shows how the orders' delivery locations are each
|
||||||
|
matched to a square-shaped cell, referred to as a pixel, on a grid
|
||||||
|
covering the entire service area within a city.
|
||||||
|
This gridification step is also applied to the pickup locations separately.
|
||||||
|
The lower-left corner is chosen at random.
|
||||||
|
\cite{winkenbach2015} apply the same gridification idea and slice an urban
|
||||||
|
area to model a location-routing problem, and \cite{singleton2017} portray
|
||||||
|
it as a standard method in the field of urban analytics.
|
||||||
|
With increasing pixel sizes, the time series exhibit more order aggregation
|
||||||
|
with a possibly stronger demand pattern.
|
||||||
|
On the other hand, the larger the pixels, the less valuable become the
|
||||||
|
generated forecasts as, for example, a courier sent to a pixel
|
||||||
|
preemptively then faces a longer average distance to a restaurant in the
|
||||||
|
pixel.
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
\captionof{figure}{Gridification for delivery locations in Paris with a pixel
|
||||||
|
size of $1~\text{km}^2$}
|
||||||
|
\label{f:grid}
|
||||||
|
\includegraphics[width=.8\linewidth]{static/gridification_for_paris_gray.png}
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
After gridification, the ad-hoc orders within a pixel are aggregated by their
|
||||||
|
placement timestamps into sub-daily time steps of pre-defined lengths
|
||||||
|
to obtain a time table as exemplified in Figure \ref{f:timetable} with
|
||||||
|
one-hour intervals.
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
\captionof{figure}{Aggregation into a time table with hourly time steps}
|
||||||
|
\label{f:timetable}
|
||||||
|
\begin{tabular}{|c||*{9}{c|}}
|
||||||
|
\hline
|
||||||
|
\backslashbox{Time}{Day} & \makebox[2em]{\ldots}
|
||||||
|
& \makebox[3em]{Mon} & \makebox[3em]{Tue}
|
||||||
|
& \makebox[3em]{Wed} & \makebox[3em]{Thu}
|
||||||
|
& \makebox[3em]{Fri} & \makebox[3em]{Sat}
|
||||||
|
& \makebox[3em]{Sun} & \makebox[2em]{\ldots} \\
|
||||||
|
\hline
|
||||||
|
\hline
|
||||||
|
11:00 & \ldots & $y_{11,Mon}$ & $y_{11,Tue}$ & $y_{11,Wed}$ & $y_{11,Thu}$
|
||||||
|
& $y_{11,Fri}$ & $y_{11,Sat}$ & $y_{11,Sun}$ & \ldots \\
|
||||||
|
\hline
|
||||||
|
12:00 & \ldots & $y_{12,Mon}$ & $y_{12,Tue}$ & $y_{12,Wed}$ & $y_{12,Thu}$
|
||||||
|
& $y_{12,Fri}$ & $y_{12,Sat}$ & $y_{12,Sun}$ & \ldots \\
|
||||||
|
\hline
|
||||||
|
\ldots & \ldots & \ldots & \ldots & \ldots
|
||||||
|
& \ldots & \ldots & \ldots & \ldots & \ldots \\
|
||||||
|
\hline
|
||||||
|
20:00 & \ldots & $y_{20,Mon}$ & $y_{20,Tue}$ & $y_{20,Wed}$ & $y_{20,Thu}$
|
||||||
|
& $y_{20,Fri}$ & $y_{20,Sat}$ & $y_{20,Sun}$ & \ldots \\
|
||||||
|
\hline
|
||||||
|
21:00 & \ldots & $y_{21,Mon}$ & $y_{21,Tue}$ & $y_{21,Wed}$ & $y_{21,Thu}$
|
||||||
|
& $y_{21,Fri}$ & $y_{21,Sat}$ & $y_{21,Sun}$ & \ldots \\
|
||||||
|
\hline
|
||||||
|
\ldots & \ldots & \ldots & \ldots & \ldots
|
||||||
|
& \ldots & \ldots & \ldots & \ldots & \ldots \\
|
||||||
|
\hline
|
||||||
|
\end{tabular}
|
||||||
|
\end{center}
|
||||||
|
\
|
||||||
|
|
||||||
|
Consequently, each $y_{t,d}$ in Figure \ref{f:timetable} is the number of
|
||||||
|
all orders within the pixel for the time of day $t$ and day of week
|
||||||
|
$d$ ($y_t$ and $y_{t,d}$ are the same but differ in that the latter
|
||||||
|
acknowledges a 2D view).
|
||||||
|
The same trade-off as with gridification applies:
|
||||||
|
The shorter the interval, the weaker is the demand pattern to be expected in
|
||||||
|
the time series due to less aggregation while longer intervals lead to
|
||||||
|
less usable forecasts.
|
||||||
|
We refer to time steps by their start time, and their number per day, $H$,
|
||||||
|
is constant.
|
||||||
|
Given a time table as in Figure \ref{f:timetable} there are two ways to
|
||||||
|
generate a time series by slicing:
|
||||||
|
\begin{enumerate}
|
||||||
|
\item \textbf{Horizontal View}:
|
||||||
|
Take only the order counts for a given time of the day
|
||||||
|
\item \textbf{Vertical View}:
|
||||||
|
Take all order counts and remove the double-seasonal pattern induced
|
||||||
|
by the weekday and time of the day with decomposition
|
||||||
|
\end{enumerate}
|
||||||
|
Distinct time series are retrieved by iterating through the time tables either
|
||||||
|
horizontally or vertically in increments of a single time step.
|
||||||
|
Another property of a generated time series is its length, which, following
|
||||||
|
the next sub-section, can be interpreted as the sum of the production
|
||||||
|
training set and the test day.
|
||||||
|
In summary, a distinct time series is generated from the tabular order data
|
||||||
|
based on a configuration of parameters for the dimensions pixel size,
|
||||||
|
number of daily time steps $H$, shape (horizontal vs. vertical), length,
|
||||||
|
and the time step to be predicted.
|
86
tex/3_mod/4_cv.tex
Normal file
86
tex/3_mod/4_cv.tex
Normal file
|
@ -0,0 +1,86 @@
|
||||||
|
\subsection{Unified Cross-Validation and Training, Validation, and Test Sets}
|
||||||
|
\label{unified_cv}
|
||||||
|
|
||||||
|
The standard $k$-fold CV, which assumes no structure in the individual
|
||||||
|
features of the samples, as shown in $\mat{X}$ above, is adapted to the
|
||||||
|
ordinal character of time series data:
|
||||||
|
A model must be evaluated on observations that occurred strictly after the
|
||||||
|
ones used for training as, otherwise, the model knows about the future.
|
||||||
|
Furthermore, some models predict only a single to a few time steps before
|
||||||
|
being retrained, while others predict an entire day without retraining
|
||||||
|
(cf., Sub-section \ref{ml_models}).
|
||||||
|
Consequently, we must use a unified time interval wherein all forecasts are
|
||||||
|
made first before the entire interval is evaluated.
|
||||||
|
As whole days are the longest prediction interval for models without
|
||||||
|
retraining, we choose that as the unified time interval.
|
||||||
|
In summary, our CV methodology yields a distinct best model per pixel and day
|
||||||
|
to be forecast.
|
||||||
|
Whole days are also practical for managers who commonly monitor, for example,
|
||||||
|
the routing and thus the forecasting performance on a day-to-day basis.
|
||||||
|
Our methodology assumes that the models are trained at least once per day.
|
||||||
|
As we create operational forecasts into the near future in this paper,
|
||||||
|
retraining all models with the latest available data is a logical step.
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
\captionof{figure}{Training, validation, and test sets
|
||||||
|
during cross validation}
|
||||||
|
\label{f:cv}
|
||||||
|
\includegraphics[width=.8\linewidth]{static/cross_validation_gray.png}
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
The training, validation, and test sets are defined as follows.
|
||||||
|
To exemplify the logic, we refer to Figure \ref{f:cv} that shows the calendar
|
||||||
|
setup (i.e., weekdays on the x-axis) for three days $T_1$, $T_2$, and
|
||||||
|
$T_3$ (shown in dark gray) for which we generate forecasts.
|
||||||
|
Each of these days is, by definition, a test day, and the test set comprises
|
||||||
|
all time series, horizontal or vertical, whose last observation lies on
|
||||||
|
that day.
|
||||||
|
With an assumed training horizon of three weeks, the 21 days before each of
|
||||||
|
the test days constitute the corresponding training sets (shown in lighter
|
||||||
|
gray on the same rows as $T_1$, $T_2$, and $T_3$).
|
||||||
|
There are two kinds of validation sets, depending on the decision to be made.
|
||||||
|
First, if a forecasting method needs parameter tuning, the original training
|
||||||
|
set is divided into as many equally long series as validation days are
|
||||||
|
needed to find stable parameters.
|
||||||
|
The example shows three validation days per test day named $V_n$ (shown
|
||||||
|
in darker gray below each test day).
|
||||||
|
The $21 - 3 = 18$ preceding days constitute the training set corresponding to
|
||||||
|
a validation day.
|
||||||
|
To obtain the overall validation error, the three errors are averaged.
|
||||||
|
We call these \textit{inner} validation sets because they must be repeated
|
||||||
|
each day to re-tune the parameters and because the involved time series
|
||||||
|
are true subsets of the original series.
|
||||||
|
Second, to find the best method per day and pixel, the same averaging logic
|
||||||
|
is applied on the outer level.
|
||||||
|
For example, if we used two validation days to find the best method for $T_3$,
|
||||||
|
we would average the errors of $T_1$ and $T_2$ for each method and select
|
||||||
|
the winner; then, $T_1$ and $T_2$ constitute an \textit{outer} validation
|
||||||
|
set.
|
||||||
|
Whereas the number of inner validation days is method-specific and must be
|
||||||
|
chosen before generating any test day forecasts in the first place, the
|
||||||
|
number of outer validation days may be varied after the fact and is
|
||||||
|
determined empirically as we show in Section \ref{stu}.
|
||||||
|
|
||||||
|
Our unified CV approach is also optimized for large-scale production settings,
|
||||||
|
for example, at companies like Uber.
|
||||||
|
As \cite{bell2018} note, there is a trade-off as to when each of the
|
||||||
|
inner time series in the example begins.
|
||||||
|
While the forecasting accuracy likely increases with more training days,
|
||||||
|
supporting inner series with increasing lengths, cutting the series
|
||||||
|
to the same length allows caching the forecasts and errors.
|
||||||
|
In the example, $V_3$, $V_5$, and $V_7$, as well as $V_6$ and $V_8$ are
|
||||||
|
identical despite belonging to different inner validation sets.
|
||||||
|
Caching is also possible on the outer level when searching for an optimal
|
||||||
|
number of validation days for model selection.
|
||||||
|
We achieved up to 80\% cache hit ratios in our implementation in the
|
||||||
|
empirical study, thereby saving computational resources by the same
|
||||||
|
amount.
|
||||||
|
Lastly, we assert that our suggested CV, because of its being unified
|
||||||
|
around whole test days and usage of fix-sized time series, is also
|
||||||
|
suitable for creating consistent learning curves and, thus, answering
|
||||||
|
\textbf{Q3} on the relationship between forecast accuracy and amount of
|
||||||
|
historic data:
|
||||||
|
We simply increase the length of the outer training set holding the test day
|
||||||
|
fixed.
|
||||||
|
Thus, independent of a method's need for parameter tuning, all methods have
|
||||||
|
the same demand history available for each test day forecast.
|
87
tex/3_mod/5_mase.tex
Normal file
87
tex/3_mod/5_mase.tex
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
\subsection{Accuracy Measures}
|
||||||
|
\label{mase}
|
||||||
|
|
||||||
|
Choosing an error measure for both model selection and evaluation is not
|
||||||
|
straightforward when working with intermittent demand, as shown, for
|
||||||
|
example, by \cite{syntetos2005}, and one should understand the trade-offs
|
||||||
|
between measures.
|
||||||
|
\cite{hyndman2006} provide a study of measures with real-life data taken from
|
||||||
|
the popular M3-competition and find that most standard measures degenerate
|
||||||
|
under many scenarios.
|
||||||
|
They also provide a classification scheme for which we summarize the main
|
||||||
|
points as they apply to the UDP case:
|
||||||
|
\begin{enumerate}
|
||||||
|
\item \textbf{Scale-dependent Errors}:
|
||||||
|
The error is reported in the same unit as the raw data.
|
||||||
|
Two popular examples are the root mean square error (RMSE) and mean absolute
|
||||||
|
error (MAE).
|
||||||
|
They may be used for model selection and evaluation within a pixel, and are
|
||||||
|
intuitively interpretable; however, they may not be used to compare errors
|
||||||
|
of, for example, a low-demand pixel (e.g., at the UDP's service
|
||||||
|
boundary) with that of a high-demand pixel (e.g., downtown).
|
||||||
|
\item \textbf{Percentage Errors}:
|
||||||
|
The error is derived from the percentage errors of individual forecasts per
|
||||||
|
time step, and is also intuitively interpretable.
|
||||||
|
A popular example is the mean absolute percentage error (MAPE) that is the
|
||||||
|
primary measure in most forecasting competitions.
|
||||||
|
Whereas such errors could be applied both within and across pixels, they
|
||||||
|
cannot be calculated reliably for intermittent demand.
|
||||||
|
If only one time step exhibits no demand, the result is a divide-by-zero
|
||||||
|
error.
|
||||||
|
This often occurs even in high-demand pixels due to the slicing.
|
||||||
|
\item \textbf{Relative Errors}:
|
||||||
|
A workaround is to calculate a scale-dependent error for the test day and
|
||||||
|
divide it by the same measure calculated with forecasts of a simple
|
||||||
|
benchmark method (e.g., na\"{i}ve method).
|
||||||
|
An example could be
|
||||||
|
$\text{RelMAE} = \text{MAE} / \text{MAE}_\text{bm}$.
|
||||||
|
Nevertheless, even simple methods create (near-)perfect forecasts, and then
|
||||||
|
$\text{MAE}_\text{bm}$ becomes (close to) $0$.
|
||||||
|
These numerical instabilities occurred so often in our studies that we argue
|
||||||
|
against using such measures.
|
||||||
|
\item \textbf{Scaled Errors}:
|
||||||
|
\cite{hyndman2006} contribute this category and introduce the mean absolute
|
||||||
|
scaled error (\gls{mase}).
|
||||||
|
It is defined as the MAE from the actual forecasting method on the test day
|
||||||
|
(i.e., "out-of-sample") divided by the MAE from the (seasonal) na\"{i}ve
|
||||||
|
method on the entire training set (i.e., "in-sample").
|
||||||
|
A MASE of $1$ indicates that a forecasting method has the same accuracy
|
||||||
|
on the test day as the (seasonal) na\"{i}ve method applied on a longer
|
||||||
|
horizon, and lower values imply higher accuracy.
|
||||||
|
Within a pixel, its results are identical to the ones obtained with MAE.
|
||||||
|
Also, we acknowledge recent publications, for example, \cite{prestwich2014} or
|
||||||
|
\cite{kim2016}, showing other ways of tackling the difficulties mentioned.
|
||||||
|
However, only the MASE provided numerically stable results for all
|
||||||
|
forecasts in our study.
|
||||||
|
\end{enumerate}
|
||||||
|
Consequently, we use the MASE with a seasonal na\"{i}ve benchmark as the
|
||||||
|
primary measure in this paper.
|
||||||
|
With the previously introduced notation, it is defined as follows:
|
||||||
|
$$
|
||||||
|
\text{MASE}
|
||||||
|
:=
|
||||||
|
\frac{\text{MAE}_{\text{out-of-sample}}}{\text{MAE}_{\text{in-sample}}}
|
||||||
|
=
|
||||||
|
\frac{\text{MAE}_{\text{forecasts}}}{\text{MAE}_{\text{training}}}
|
||||||
|
=
|
||||||
|
\frac{\frac{1}{H} \sum_{h=1}^H |y_{T+h} - \hat{y}_{T+h}|}
|
||||||
|
{\frac{1}{T-k} \sum_{t=k+1}^T |y_{t} - y_{t-k}|}
|
||||||
|
$$
|
||||||
|
The denominator can only become $0$ if the seasonal na\"{i}ve benchmark makes
|
||||||
|
a perfect forecast on each day in the training set except the first seven
|
||||||
|
days, which never happened in our case study involving hundreds of
|
||||||
|
thousands of individual model trainings.
|
||||||
|
Further, as per the discussion in the subsequent Section \ref{decomp}, we also
|
||||||
|
calculate peak-MASEs where we leave out the time steps of non-peak times
|
||||||
|
from the calculations.
|
||||||
|
For this analysis, we define all time steps that occur at lunch (i.e., noon to
|
||||||
|
2 pm) and dinner time (i.e., 6 pm to 8 pm) as peak.
|
||||||
|
As time steps in non-peak times typically average no or very low order counts,
|
||||||
|
a UDP may choose to not actively forecast these at all and be rather
|
||||||
|
interested in the accuracies of forecasting methods during peaks only.
|
||||||
|
|
||||||
|
We conjecture that percentage error measures may be usable for UDPs facing a
|
||||||
|
higher overall demand with no intra-day down-times in between but have to
|
||||||
|
leave that to a future study.
|
||||||
|
Yet, even with high and steady demand, divide-by-zero errors are likely to
|
||||||
|
occur.
|
76
tex/3_mod/6_decomp.tex
Normal file
76
tex/3_mod/6_decomp.tex
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
\subsection{Time Series Decomposition}
|
||||||
|
\label{decomp}
|
||||||
|
|
||||||
|
Concerning the time table in Figure \ref{f:timetable}, a seasonal demand
|
||||||
|
pattern is inherent to both horizontal and vertical time series.
|
||||||
|
First, the weekday influences if people eat out or order in with our partner
|
||||||
|
receiving more orders on Thursday through Saturday than the other four
|
||||||
|
days.
|
||||||
|
This pattern is part of both types of time series.
|
||||||
|
Second, on any given day, demand peaks occur around lunch and dinner times.
|
||||||
|
This only regards vertical series.
|
||||||
|
Statistical analyses show that horizontally sliced time series indeed exhibit
|
||||||
|
a periodicity of $k=7$, and vertically sliced series only yield a seasonal
|
||||||
|
component with a regular pattern if the periodicity is set to the product
|
||||||
|
of the number of weekdays and the daily time steps indicating a distinct
|
||||||
|
intra-day pattern per weekday.
|
||||||
|
|
||||||
|
Figure \ref{f:stl} shows three exemplary STL decompositions for a
|
||||||
|
$1~\text{km}^2$ pixel and a vertical time series with 60-minute time steps
|
||||||
|
(on the x-axis) covering four weeks:
|
||||||
|
With the noisy raw data $y_t$ on the left, the seasonal and trend components,
|
||||||
|
$s_t$ and $t_t$, are depicted in light and dark gray for increasing $ns$
|
||||||
|
parameters.
|
||||||
|
The plots include (seasonal) na\"{i}ve forecasts for the subsequent test day
|
||||||
|
as dotted lines.
|
||||||
|
The remainder components $r_t$ are not shown for conciseness.
|
||||||
|
The periodicity is set to $k = 7 * 12 = 84$ as our industry partner has $12$
|
||||||
|
opening hours per day.
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
\captionof{figure}{STL decompositions for a medium-demand pixel with hourly
|
||||||
|
time steps and periodicity $k=84$}
|
||||||
|
\label{f:stl}
|
||||||
|
\includegraphics[width=.95\linewidth]{static/stl_gray.png}
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
As described in Sub-section \ref{stl}, with $k$ being implied by the
|
||||||
|
application, at the very least, the length of the seasonal smoothing
|
||||||
|
window, represented by the $ns$ parameter, must be calibrated by the
|
||||||
|
forecaster:
|
||||||
|
It controls how many past observations go into each smoothened $s_t$.
|
||||||
|
Many practitioners, however, skip this step and set $ns$ to a big number, for
|
||||||
|
example, $999$, then referred to as "periodic."
|
||||||
|
For the other parameters, it is common to use the default values as
|
||||||
|
specified in \cite{cleveland1990}.
|
||||||
|
The goal is to find a decomposition with a regular pattern in $s_t$.
|
||||||
|
In Figure \ref{f:stl}, this is not true for $ns=7$ where, for
|
||||||
|
example, the four largest bars corresponding to the same time of day a
|
||||||
|
week apart cannot be connected by an approximately straight line.
|
||||||
|
On the contrary, a regular pattern in the most extreme way exists for
|
||||||
|
$ns=999$, where the same four largest bars are of the same height.
|
||||||
|
This observation holds for each time step of the day.
|
||||||
|
For $ns=11$, $s_t$ exhibits a regular pattern whose bars adapt over time:
|
||||||
|
The pattern is regular as bars corresponding to the same time of day can be
|
||||||
|
connected by approximately straight lines, and it is adaptive as these
|
||||||
|
lines are not horizontal.
|
||||||
|
The trade-off between small and large values for $ns$ can thus be interpreted
|
||||||
|
as allowing the average demand during peak times to change over time:
|
||||||
|
If demand is intermittent at non-peak times, it is reasonable to expect the
|
||||||
|
bars to change over time as only the relative differences between peak and
|
||||||
|
non-peak times impact the bars' heights with the seasonal component being
|
||||||
|
centered around $0$.
|
||||||
|
To confirm the goodness of a decomposition statistically, one way is to verify
|
||||||
|
that $r_t$ can be modeled as a typical error process like white noise
|
||||||
|
$\epsilon_t$.
|
||||||
|
|
||||||
|
However, we suggest an alternative way of calibrating the STL method in an
|
||||||
|
automated fashion based on our unified CV approach.
|
||||||
|
As hinted at in Figure \ref{f:stl}, we interpret an STL decomposition as a
|
||||||
|
forecasting method on its own by just adding the (seasonal) na\"{i}ve
|
||||||
|
forecasts for $s_t$ and $t_t$ and predicting $0$ for $r_t$.
|
||||||
|
Then, the $ns$ parameter is tuned just like a parameter for an ML model.
|
||||||
|
To the best of our knowledge, this has not yet been proposed before.
|
||||||
|
Conceptually, forecasting with the STL method can be viewed as a na\"{i}ve
|
||||||
|
method with built-in smoothing, and it outperformed all other
|
||||||
|
benchmark methods in all cases.
|
20
tex/3_mod/7_models/1_intro.tex
Normal file
20
tex/3_mod/7_models/1_intro.tex
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
\subsection{Forecasting Models}
|
||||||
|
\label{models}
|
||||||
|
|
||||||
|
This sub-section describes the concrete models in our study.
|
||||||
|
Figure \ref{f:inputs} shows how we classify them into four families with
|
||||||
|
regard to the type of the time series, horizontal or vertical, and the
|
||||||
|
moment at which a model is trained:
|
||||||
|
Solid lines indicate that the corresponding time steps lie before the
|
||||||
|
training, and dotted lines show the time horizon predicted by a model.
|
||||||
|
For conciseness, we only show the forecasts for one test day.
|
||||||
|
The setup is the same for each inner validation day.
|
||||||
|
|
||||||
|
\
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
\captionof{figure}{Classification of the models by input type and training
|
||||||
|
moment}
|
||||||
|
\label{f:inputs}
|
||||||
|
\includegraphics[width=.95\linewidth]{static/model_inputs_gray.png}
|
||||||
|
\end{center}
|
42
tex/3_mod/7_models/2_hori.tex
Normal file
42
tex/3_mod/7_models/2_hori.tex
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
\subsubsection{Horizontal and Whole-day-ahead Forecasts.}
|
||||||
|
\label{hori}
|
||||||
|
|
||||||
|
The upper-left in Figure \ref{f:inputs} illustrates the simplest way to
|
||||||
|
generate forecasts for a test day before it has started:
|
||||||
|
For each time of the day, the corresponding horizontal slice becomes the input
|
||||||
|
for a model.
|
||||||
|
With whole days being the unified time interval, each model is trained $H$
|
||||||
|
times, providing a one-step-ahead forecast.
|
||||||
|
While it is possible to have models of a different type be selected per time
|
||||||
|
step, that did not improve the accuracy in the empirical study.
|
||||||
|
As the models in this family do not include the test day's demand data in
|
||||||
|
their training sets, we see them as benchmarks to answer \textbf{Q4},
|
||||||
|
checking if a UDP can take advantage of real-time information.
|
||||||
|
The models in this family are as follows; we use prefixes, such as "h" here,
|
||||||
|
when methods are applied in other families as well:
|
||||||
|
\begin{enumerate}
|
||||||
|
\item \textit{\gls{naive}}:
|
||||||
|
Observation from the same time step one week prior
|
||||||
|
\item \textit{\gls{trivial}}:
|
||||||
|
Predict $0$ for all time steps
|
||||||
|
\item \textit{\gls{hcroston}}:
|
||||||
|
Intermittent demand method introduced by \cite{croston1972}
|
||||||
|
\item \textit{\gls{hholt}},
|
||||||
|
\textit{\gls{hhwinters}},
|
||||||
|
\textit{\gls{hses}},
|
||||||
|
\textit{\gls{hsma}}, and
|
||||||
|
\textit{\gls{htheta}}:
|
||||||
|
Exponential smoothing without calibration
|
||||||
|
\item \textit{\gls{hets}}:
|
||||||
|
ETS calibrated as described by \cite{hyndman2008b}
|
||||||
|
\item \textit{\gls{harima}}:
|
||||||
|
ARIMA calibrated as described by \cite{hyndman2008a}
|
||||||
|
\end{enumerate}
|
||||||
|
\textit{naive} and \textit{trivial} provide an absolute benchmark for the
|
||||||
|
actual forecasting methods.
|
||||||
|
\textit{hcroston} is often mentioned in the context of intermittent demand;
|
||||||
|
however, the method did not perform well at all.
|
||||||
|
Besides \textit{hhwinters} that always fits a seasonal component, the
|
||||||
|
calibration heuristics behind \textit{hets} and \textit{harima} may do so
|
||||||
|
as well.
|
||||||
|
With $k=7$, an STL decomposition is unnecessary here.
|
39
tex/3_mod/7_models/3_vert.tex
Normal file
39
tex/3_mod/7_models/3_vert.tex
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
\subsubsection{Vertical and Whole-day-ahead Forecasts without Retraining.}
|
||||||
|
\label{vert}
|
||||||
|
|
||||||
|
The upper-right in Figure \ref{f:inputs} shows an alternative way to
|
||||||
|
generate forecasts for a test day before it has started:
|
||||||
|
First, a seasonally-adjusted time series $a_t$ is obtained from a vertical
|
||||||
|
time series by STL decomposition.
|
||||||
|
Then, the actual forecasting model, trained on $a_t$, makes an $H$-step-ahead
|
||||||
|
prediction.
|
||||||
|
Lastly, we add the $H$ seasonal na\"{i}ve forecasts for the seasonal component
|
||||||
|
$s_t$ to them to obtain the actual predictions for the test day.
|
||||||
|
Thus, only one training is required per model type, and no real-time data is
|
||||||
|
used.
|
||||||
|
By decomposing the raw time series, all long-term patterns are assumed to be
|
||||||
|
in the seasonal component $s_t$, and $a_t$ only contains the level with
|
||||||
|
a potential trend and auto-correlations.
|
||||||
|
The models in this family are:
|
||||||
|
\begin{enumerate}
|
||||||
|
\item \textit{\gls{fnaive}},
|
||||||
|
\textit{\gls{pnaive}}:
|
||||||
|
Sum of STL's trend and seasonal components' na\"{i}ve forecasts
|
||||||
|
\item \textit{\gls{vholt}},
|
||||||
|
\textit{\gls{vses}}, and
|
||||||
|
\textit{\gls{vtheta}}:
|
||||||
|
Exponential smoothing without calibration and seasonal
|
||||||
|
fit
|
||||||
|
\item \textit{\gls{vets}}:
|
||||||
|
ETS calibrated as described by \cite{hyndman2008b}
|
||||||
|
\item \textit{\gls{varima}}:
|
||||||
|
ARIMA calibrated as described by \cite{hyndman2008a}
|
||||||
|
\end{enumerate}
|
||||||
|
As mentioned in Sub-section \ref{unified_cv}, we include the sum of the
|
||||||
|
(seasonal) na\"{i}ve forecasts of the STL's trend and seasonal components
|
||||||
|
as forecasts on their own:
|
||||||
|
For \textit{fnaive}, we tune the "flexible" $ns$ parameter, and for
|
||||||
|
\textit{pnaive}, we set it to a "periodic" value.
|
||||||
|
Thus, we implicitly assume that there is no signal in the remainder $r_t$, and
|
||||||
|
predict $0$ for it.
|
||||||
|
\textit{fnaive} and \textit{pnaive} are two more simple benchmarks.
|
22
tex/3_mod/7_models/4_rt.tex
Normal file
22
tex/3_mod/7_models/4_rt.tex
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
\subsubsection{Vertical and Real-time Forecasts with Retraining.}
|
||||||
|
\label{rt}
|
||||||
|
|
||||||
|
The lower-left in Figure \ref{f:inputs} shows how models trained on vertical
|
||||||
|
time series are extended with real-time order data as it becomes available
|
||||||
|
during a test day:
|
||||||
|
Instead of obtaining an $H$-step-ahead forecast, we retrain a model after
|
||||||
|
every time step and only predict one step.
|
||||||
|
The remainder is as in the previous sub-section, and the models are:
|
||||||
|
\begin{enumerate}
|
||||||
|
\item \textit{\gls{rtholt}},
|
||||||
|
\textit{\gls{rtses}}, and
|
||||||
|
\textit{\gls{rttheta}}:
|
||||||
|
Exponential smoothing without calibration and seasonal fit
|
||||||
|
\item \textit{\gls{rtets}}:
|
||||||
|
ETS calibrated as described by \cite{hyndman2008b}
|
||||||
|
\item \textit{\gls{rtarima}}:
|
||||||
|
ARIMA calibrated as described by \cite{hyndman2008a}
|
||||||
|
\end{enumerate}
|
||||||
|
Retraining \textit{fnaive} and \textit{pnaive} did not increase accuracy, and
|
||||||
|
thus we left them out.
|
||||||
|
A downside of this family is the significant increase in computing costs.
|
54
tex/3_mod/7_models/5_ml.tex
Normal file
54
tex/3_mod/7_models/5_ml.tex
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
\subsubsection{Vertical and Real-time Forecasts without Retraining.}
|
||||||
|
\label{ml_models}
|
||||||
|
|
||||||
|
The lower-right in Figure \ref{f:inputs} shows how ML models take
|
||||||
|
real-time order data into account without retraining.
|
||||||
|
Based on the seasonally-adjusted time series $a_t$, we employ the feature
|
||||||
|
matrix and label vector representations from Sub-section \ref{learning}
|
||||||
|
and set $n$ to the number of daily time steps, $H$, to cover all potential
|
||||||
|
auto-correlations.
|
||||||
|
The ML models are trained once before a test day starts.
|
||||||
|
For training, the matrix and vector are populated such that $y_T$ is set to
|
||||||
|
the last time step of the day before the forecasts, $a_T$.
|
||||||
|
As the splitting during CV is done with whole days, the \gls{ml} models are
|
||||||
|
trained with training sets consisting of samples from all times of a day
|
||||||
|
in an equal manner.
|
||||||
|
Thus, the ML models learn to predict each time of the day.
|
||||||
|
For prediction on a test day, the $H$ observations preceding the time
|
||||||
|
step to be forecast are used as the input vector after seasonal
|
||||||
|
adjustment.
|
||||||
|
As a result, real-time data are included.
|
||||||
|
The models in this family are:
|
||||||
|
\begin{enumerate}
|
||||||
|
\item \textit{\gls{vrfr}}: RF trained on the matrix as described
|
||||||
|
\item \textit{\gls{vsvr}}: SVR trained on the matrix as described
|
||||||
|
\end{enumerate}
|
||||||
|
We tried other ML models such as gradient boosting machines but found
|
||||||
|
only RFs and SVRs to perform well in our study.
|
||||||
|
In the case of gradient boosting machines, this is to be expected as they are
|
||||||
|
known not to perform well in the presence of high noise - as is natural
|
||||||
|
with low count data - as shown, for example, by \cite{ma2018} or
|
||||||
|
\cite{mason2000}.
|
||||||
|
Also, deep learning methods are not applicable as the feature matrices only
|
||||||
|
consist of several hundred to thousands of rows (cf., Sub-section
|
||||||
|
\ref{params}).
|
||||||
|
In \ref{tabular_ml_models}, we provide an alternative feature matrix
|
||||||
|
representation that exploits the two-dimensional structure of time tables
|
||||||
|
without decomposing the time series.
|
||||||
|
In \ref{enhanced_feats}, we show how feature matrices are extended
|
||||||
|
to include predictors other than historical order data.
|
||||||
|
However, to answer \textbf{Q5} already here, none of the external data sources
|
||||||
|
improves the results in our study.
|
||||||
|
Due to the high number of time series in our study, to investigate why
|
||||||
|
no external sources improve the forecasts, we must us some automated
|
||||||
|
approach to analyzing individual time series.
|
||||||
|
\cite{barbour2014} provide a spectral density estimation approach, called
|
||||||
|
the Shannon entropy, that measures the signal-to-noise ratio in a
|
||||||
|
database with a number normalized between 0 and 1 where lower values
|
||||||
|
indicate a higher signal-to-noise ratio.
|
||||||
|
We then looked at averages of the estimates on a daily level per pixel and
|
||||||
|
find that including any of the external data sources from
|
||||||
|
\ref{enhanced_feats} always leads to significantly lower signal-to-noise
|
||||||
|
ratios.
|
||||||
|
Thus, we conclude that at least for the demand faced by our industry partner
|
||||||
|
the historical data contains all of the signal.
|
|
@ -1,2 +1,5 @@
|
||||||
\section{Empirical Study: A Meal Delivery Platform in Europe}
|
\section{Empirical Study: A Meal Delivery Platform in Europe}
|
||||||
\label{stu}
|
\label{stu}
|
||||||
|
|
||||||
|
% temporary placeholder
|
||||||
|
\label{params}
|
54
tex/apx/case_study.tex
Normal file
54
tex/apx/case_study.tex
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
\section{Raw Order Data in the Case Study}
|
||||||
|
\label{dataset}
|
||||||
|
|
||||||
|
The raw data for the empirical study in Section \ref{stu} was provided by a
|
||||||
|
meal delivery platform operating in five cities in France in 2016.
|
||||||
|
The platform received a total of 686,385 orders distributed as follows:
|
||||||
|
|
||||||
|
\
|
||||||
|
\begin{center}
|
||||||
|
\begin{tabular}{llr}
|
||||||
|
\hline
|
||||||
|
\thead{City} & \thead{Launch Day} & \thead{Orders} \\
|
||||||
|
\hline
|
||||||
|
Bordeaux & July 18 & 64,012 \\
|
||||||
|
Lille & October 30 & 14,362 \\
|
||||||
|
Lyon & February 21 & 214,635 \\
|
||||||
|
Nantes & October 31 & 12,900 \\
|
||||||
|
Paris & March 7 & 380,476 \\
|
||||||
|
\end{tabular}
|
||||||
|
\end{center}
|
||||||
|
\
|
||||||
|
|
||||||
|
The part of the database relevant for forecasting can be thought of as one
|
||||||
|
table per city, where each row represents one order and consists of the
|
||||||
|
following groups of columns:
|
||||||
|
\begin{enumerate}
|
||||||
|
\item \textbf{Restaurant Data}
|
||||||
|
\begin{enumerate}
|
||||||
|
\item unique ID and name
|
||||||
|
\item pickup location as latitude-longitude pair
|
||||||
|
\end{enumerate}
|
||||||
|
\item \textbf{Customer Data}
|
||||||
|
\begin{enumerate}
|
||||||
|
\item unique ID, name, and phone number
|
||||||
|
\item delivery location as latitude-longitude pair (mostly physical
|
||||||
|
addresses but also public spots)
|
||||||
|
\end{enumerate}
|
||||||
|
\item \textbf{Timestamps}
|
||||||
|
\begin{enumerate}
|
||||||
|
\item placement via the smartphone app
|
||||||
|
\item fulfillment workflow (pickup, delivery, cancellation, re-deliveries)
|
||||||
|
\end{enumerate}
|
||||||
|
\item \textbf{Courier Data}
|
||||||
|
\begin{enumerate}
|
||||||
|
\item unique ID, name, and phone number
|
||||||
|
\item shift data (begin, breaks, end)
|
||||||
|
\item average speed
|
||||||
|
\end{enumerate}
|
||||||
|
\item \textbf{Order Details}
|
||||||
|
\begin{enumerate}
|
||||||
|
\item meals and drinks
|
||||||
|
\item prices and discounts granted
|
||||||
|
\end{enumerate}
|
||||||
|
\end{enumerate}
|
121
tex/apx/enhanced_feats.tex
Normal file
121
tex/apx/enhanced_feats.tex
Normal file
|
@ -0,0 +1,121 @@
|
||||||
|
\section{Enhancing Forecasting Models with External Data}
|
||||||
|
\label{enhanced_feats}
|
||||||
|
|
||||||
|
In this appendix, we show how the feature matrix in Sub-section
|
||||||
|
\ref{ml_models} can be extended with features other than historical order
|
||||||
|
data.
|
||||||
|
Then, we provide an overview of what external data we tried out as predictors
|
||||||
|
in our empirical study.
|
||||||
|
|
||||||
|
\subsection{Enhanced Feature Matrices}
|
||||||
|
|
||||||
|
Feature matrices can naturally be extended by appending new feature columns
|
||||||
|
$x_{t,f}$ or $x_f$ on the right where the former represent predictors
|
||||||
|
changing throughout a day and the latter being static either within a
|
||||||
|
pixel or across a city.
|
||||||
|
$f$ refers to an external predictor variable, such as one of the examples
|
||||||
|
listed below.
|
||||||
|
In the SVR case, the columns should be standardized before fitting as external
|
||||||
|
predictors are most likely on a different scale than the historic order
|
||||||
|
data.
|
||||||
|
Thus, for a matrix with seasonally-adjusted order data $a_t$ in it, an
|
||||||
|
enhanced matrix looks as follows:
|
||||||
|
|
||||||
|
$$
|
||||||
|
\vec{y}
|
||||||
|
=
|
||||||
|
\begin{pmatrix}
|
||||||
|
a_T \\
|
||||||
|
a_{T-1} \\
|
||||||
|
\dots \\
|
||||||
|
a_{H+1}
|
||||||
|
\end{pmatrix}
|
||||||
|
~~~~~
|
||||||
|
\mat{X}
|
||||||
|
=
|
||||||
|
\begin{bmatrix}
|
||||||
|
a_{T-1} & a_{T-2} & \dots & a_{T-H} & ~~~
|
||||||
|
& x_{T,A} & \dots & x_{B} & \dots \\
|
||||||
|
a_{T-2} & a_{T-3} & \dots & a_{T-(H+1)} & ~~~
|
||||||
|
& x_{T-1,A} & \dots & x_{B} & \dots \\
|
||||||
|
\dots & \dots & \dots & \dots & ~~~
|
||||||
|
& \dots & \dots & \dots & \dots \\
|
||||||
|
a_H & a_{H-1} & \dots & a_1 & ~~~
|
||||||
|
& x_{H+1,A} & \dots & x_{B} & \dots
|
||||||
|
\end{bmatrix}
|
||||||
|
$$
|
||||||
|
\
|
||||||
|
|
||||||
|
Similarly, we can also enhance the tabular matrices from
|
||||||
|
\ref{tabular_ml_models}.
|
||||||
|
The same comments as for their pure equivalents in Sub-section \ref{ml_models}
|
||||||
|
apply, in particular, that ML models trained with an enhanced matrix can
|
||||||
|
process real-time data without being retrained.
|
||||||
|
|
||||||
|
\subsection{External Data in the Empirical Study}
|
||||||
|
\label{external_data}
|
||||||
|
|
||||||
|
In the empirical study, we tested four groups of external features that we
|
||||||
|
briefly describe here.
|
||||||
|
|
||||||
|
\vskip 0.1in
|
||||||
|
|
||||||
|
\textbf{Calendar Features}:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Time of day (as synthesized integers: e.g., 1,050 for 10:30 am,
|
||||||
|
or 1,600 for 4 pm)
|
||||||
|
\item Day of week (as one-hot encoded booleans)
|
||||||
|
\item Work day or not (as booleans)
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\vskip 0.1in
|
||||||
|
|
||||||
|
\textbf{Features derived from the historical Order Data}:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Number of pre-orders for a time step (as integers)
|
||||||
|
\item 7-day SMA of the percentages of discounted orders (as percentages):
|
||||||
|
The platform is known for running marketing campaigns aimed at
|
||||||
|
first-time customers at irregular intervals. Consequently, the
|
||||||
|
order data show a wave-like pattern of coupons redeemed when looking
|
||||||
|
at the relative share of discounted orders per day.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\vskip 0.1in
|
||||||
|
|
||||||
|
\textbf{Neighborhood Features}:
|
||||||
|
\begin{itemize}
|
||||||
|
\item Ambient population (as integers) as obtained from the ORNL LandScan
|
||||||
|
database
|
||||||
|
\item Number of active platform restaurants (as integers)
|
||||||
|
\item Number of overall restaurants, food outlets, retailers, and other
|
||||||
|
businesses (as integers) as obtained from the Google Maps and Yelp
|
||||||
|
web services
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\vskip 0.1in
|
||||||
|
|
||||||
|
\textbf{Real-time Weather} (raw data obtained from IBM's
|
||||||
|
Wunderground database):
|
||||||
|
\begin{itemize}
|
||||||
|
\item Absolute temperature, wind speed, and humidity
|
||||||
|
(as decimals and percentages)
|
||||||
|
\item Relative temperature with respect to 3-day and 7-day historical
|
||||||
|
means (as decimals)
|
||||||
|
\item Day vs. night defined by sunset (as booleans)
|
||||||
|
\item Summarized description (as indicators $-1$, $0$, and $+1$)
|
||||||
|
\item Lags of the absolute temperature and the summaries covering the
|
||||||
|
previous three hours
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\vskip 0.1in
|
||||||
|
|
||||||
|
Unfortunately, we must report that none of the mentioned external data
|
||||||
|
improved the accuracy of the forecasts.
|
||||||
|
Some led to models overfitting the data, which could not be regulated.
|
||||||
|
Manual tests revealed that real-time weather data are the most promising
|
||||||
|
external source.
|
||||||
|
Nevertheless, the data provided by IBM's Wunderground database originate from
|
||||||
|
weather stations close to airports, which implies that we only have the
|
||||||
|
same aggregate weather data for the entire city.
|
||||||
|
If weather data is available on a more granular basis in the future, we see
|
||||||
|
some potential for exploitation.
|
261
tex/apx/peak_results.tex
Normal file
261
tex/apx/peak_results.tex
Normal file
|
@ -0,0 +1,261 @@
|
||||||
|
\section{Forecasting Accuracies during Peak Times}
|
||||||
|
\label{peak_results}
|
||||||
|
|
||||||
|
This appendix shows all result tables from the main text with the MASE
|
||||||
|
averages calculated from time steps within peak times.
|
||||||
|
Peaks are the times of the day where the typical customer has a lunch or
|
||||||
|
dinner meal and defined to be either from 12 pm to 2 pm or from 6 pm to
|
||||||
|
8 pm.
|
||||||
|
While the exact decimals of the MASEs differ from the ones in the main
|
||||||
|
text, the relative ranks of the forecasting methods are the same except in
|
||||||
|
rare cases.
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
\captionof{table}{Top-3 models by training weeks and average demand
|
||||||
|
($1~\text{km}^2$ pixel size, 60-minute time steps)}
|
||||||
|
\label{t:results:a}
|
||||||
|
\begin{tabular}{|c|c|*{12}{c|}}
|
||||||
|
|
||||||
|
\hline
|
||||||
|
\multirow{3}{*}{\rotatebox{90}{\thead{Training}}}
|
||||||
|
& \multirow{3}{*}{\rotatebox{90}{\thead{Rank}}}
|
||||||
|
& \multicolumn{3}{c|}{\thead{No Demand}}
|
||||||
|
& \multicolumn{3}{c|}{\thead{Low Demand}}
|
||||||
|
& \multicolumn{3}{c|}{\thead{Medium Demand}}
|
||||||
|
& \multicolumn{3}{c|}{\thead{High Demand}} \\
|
||||||
|
~ & ~
|
||||||
|
& \multicolumn{3}{c|}{(0 - 2.5)}
|
||||||
|
& \multicolumn{3}{c|}{(2.5 - 10)}
|
||||||
|
& \multicolumn{3}{c|}{(10 - 25)}
|
||||||
|
& \multicolumn{3}{c|}{(25 - $\infty$)} \\
|
||||||
|
\cline{3-14}
|
||||||
|
~ & ~
|
||||||
|
& Method & MASE & $n$
|
||||||
|
& Method & MASE & $n$
|
||||||
|
& Method & MASE & $n$
|
||||||
|
& Method & MASE & $n$ \\
|
||||||
|
|
||||||
|
\hline \hline
|
||||||
|
\multirow{3}{*}{3} & 1
|
||||||
|
& \textbf{\textit{trivial}}
|
||||||
|
& 0.794 & \multirow{3}{*}{\rotatebox{90}{4586}}
|
||||||
|
& \textbf{\textit{hsma}}
|
||||||
|
& 0.817 & \multirow{3}{*}{\rotatebox{90}{2975}}
|
||||||
|
& \textbf{\textit{hsma}}
|
||||||
|
& 0.838 & \multirow{3}{*}{\rotatebox{90}{2743}}
|
||||||
|
& \textbf{\textit{rtarima}}
|
||||||
|
& 0.871 & \multirow{3}{*}{\rotatebox{90}{2018}} \\
|
||||||
|
~ & 2
|
||||||
|
& \textit{hsma} & 0.808 & ~
|
||||||
|
& \textit{hses} & 0.847 & ~
|
||||||
|
& \textit{hses} & 0.851 & ~
|
||||||
|
& \textit{rtses} & 0.872 & ~ \\
|
||||||
|
~ & 3
|
||||||
|
& \textit{pnaive} & 0.938 & ~
|
||||||
|
& \textit{hets} & 0.848 & ~
|
||||||
|
& \textit{hets} & 0.853 & ~
|
||||||
|
& \textit{rtets} & 0.874 & ~ \\
|
||||||
|
|
||||||
|
\hline
|
||||||
|
\multirow{3}{*}{4} & 1
|
||||||
|
& \textbf{\textit{trivial}}
|
||||||
|
& 0.791 & \multirow{3}{*}{\rotatebox{90}{4532}}
|
||||||
|
& \textbf{\textit{hsma}}
|
||||||
|
& 0.833 & \multirow{3}{*}{\rotatebox{90}{3033}}
|
||||||
|
& \textbf{\textit{hsma}}
|
||||||
|
& 0.839 & \multirow{3}{*}{\rotatebox{90}{2687}}
|
||||||
|
& \textbf{\textit{vrfr}}
|
||||||
|
& 0.848 & \multirow{3}{*}{\rotatebox{90}{2016}} \\
|
||||||
|
~ & 2
|
||||||
|
& \textit{hsma} & 0.794 & ~
|
||||||
|
& \textit{hses} & 0.838 & ~
|
||||||
|
& \textit{hses} & 0.847 & ~
|
||||||
|
& \textbf{\textit{rtarima}} & 0.851 & ~ \\
|
||||||
|
~ & 3
|
||||||
|
& \textit{pnaive} & 0.907 & ~
|
||||||
|
& \textit{hets} & 0.841 & ~
|
||||||
|
& \textit{hets} & 0.851 & ~
|
||||||
|
& \textit{rtses} & 0.857 & ~ \\
|
||||||
|
|
||||||
|
\hline
|
||||||
|
\multirow{3}{*}{5} & 1
|
||||||
|
& \textbf{\textit{trivial}}
|
||||||
|
& 0.782 & \multirow{3}{*}{\rotatebox{90}{4527}}
|
||||||
|
& \textbf{\textit{hsma}}
|
||||||
|
& 0.844 & \multirow{3}{*}{\rotatebox{90}{3055}}
|
||||||
|
& \textbf{\textit{hsma}}
|
||||||
|
& 0.841 & \multirow{3}{*}{\rotatebox{90}{2662}}
|
||||||
|
& \textbf{\textit{vrfr}}
|
||||||
|
& 0.849 & \multirow{3}{*}{\rotatebox{90}{2019}} \\
|
||||||
|
~ & 2
|
||||||
|
& \textit{hsma} & 0.802 & ~
|
||||||
|
& \textit{hses} & 0.851 & ~
|
||||||
|
& \textit{hets} & 0.844 & ~
|
||||||
|
& \textbf{\textit{rtarima}} & 0.851 & ~ \\
|
||||||
|
~ & 3
|
||||||
|
& \textit{pnaive} & 0.888 & ~
|
||||||
|
& \textit{hets} & 0.863 & ~
|
||||||
|
& \textit{hses} & 0.845 & ~
|
||||||
|
& \textit{vsvr} & 0.853 & ~ \\
|
||||||
|
|
||||||
|
\hline
|
||||||
|
\multirow{3}{*}{6} & 1
|
||||||
|
& \textbf{\textit{trivial}}
|
||||||
|
& 0.743 & \multirow{3}{*}{\rotatebox{90}{4470}}
|
||||||
|
& \textbf{\textit{hsma}}
|
||||||
|
& 0.843 & \multirow{3}{*}{\rotatebox{90}{3086}}
|
||||||
|
& \textbf{\textit{hsma}}
|
||||||
|
& 0.841 & \multirow{3}{*}{\rotatebox{90}{2625}}
|
||||||
|
& \textbf{\textit{vrfr}}
|
||||||
|
& 0.844 & \multirow{3}{*}{\rotatebox{90}{2025}} \\
|
||||||
|
~ & 2
|
||||||
|
& \textit{hsma} & 0.765 & ~
|
||||||
|
& \textit{hses} & 0.853 & ~
|
||||||
|
& \textit{hses} & 0.844 & ~
|
||||||
|
& \textbf{\textit{hets}} & 0.847 & ~ \\
|
||||||
|
~ & 3
|
||||||
|
& \textit{pnaive} & 0.836 & ~
|
||||||
|
& \textit{hets} & 0.861 & ~
|
||||||
|
& \textit{hets} & 0.844 & ~
|
||||||
|
& \textit{vsvr} & 0.849 & ~ \\
|
||||||
|
|
||||||
|
\hline
|
||||||
|
\multirow{3}{*}{7} & 1
|
||||||
|
& \textbf{\textit{trivial}}
|
||||||
|
& 0.728 & \multirow{3}{*}{\rotatebox{90}{4454}}
|
||||||
|
& \textbf{\textit{hsma}}
|
||||||
|
& 0.855 & \multirow{3}{*}{\rotatebox{90}{3132}}
|
||||||
|
& \textbf{\textit{hets}}
|
||||||
|
& 0.843 & \multirow{3}{*}{\rotatebox{90}{2597}}
|
||||||
|
& \textbf{\textit{hets}}
|
||||||
|
& 0.839 & \multirow{3}{*}{\rotatebox{90}{2007}} \\
|
||||||
|
~ & 2
|
||||||
|
& \textit{hsma} & 0.744 & ~
|
||||||
|
& \textit{hses} & 0.862 & ~
|
||||||
|
& \textit{hsma} & 0.845 & ~
|
||||||
|
& \textbf{\textit{vrfr}} & 0.842 & ~ \\
|
||||||
|
~ & 3
|
||||||
|
& \textit{pnaive} & 0.812 & ~
|
||||||
|
& \textit{hets} & 0.868 & ~
|
||||||
|
& \textbf{\textit{vsvr}} & 0.849 & ~
|
||||||
|
& \textit{vsvr} & 0.846 & ~ \\
|
||||||
|
|
||||||
|
\hline
|
||||||
|
\multirow{3}{*}{8} & 1
|
||||||
|
& \textbf{\textit{trivial}}
|
||||||
|
& 0.736 & \multirow{3}{*}{\rotatebox{90}{4402}}
|
||||||
|
& \textbf{\textit{hsma}}
|
||||||
|
& 0.865 & \multirow{3}{*}{\rotatebox{90}{3159}}
|
||||||
|
& \textbf{\textit{hets}}
|
||||||
|
& 0.843 & \multirow{3}{*}{\rotatebox{90}{2575}}
|
||||||
|
& \textbf{\textit{hets}}
|
||||||
|
& 0.837 & \multirow{3}{*}{\rotatebox{90}{2002}} \\
|
||||||
|
~ & 2
|
||||||
|
& \textit{hsma} & 0.759 & ~
|
||||||
|
& \textit{hets} & 0.874 & ~
|
||||||
|
& \textbf{\textit{vsvr}} & 0.848 & ~
|
||||||
|
& \textbf{\textit{vrfr}} & 0.841 & ~ \\
|
||||||
|
~ & 3
|
||||||
|
& \textit{pnaive} & 0.820 & ~
|
||||||
|
& \textit{hses} & 0.879 & ~
|
||||||
|
& \textit{hsma} & 0.850 & ~
|
||||||
|
& \textit{vsvr} & 0.847 & ~ \\
|
||||||
|
|
||||||
|
\hline
|
||||||
|
\end{tabular}
|
||||||
|
\end{center}
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
\captionof{table}{Ranking of benchmark and horizontal models
|
||||||
|
($1~\text{km}^2$ pixel size, 60-minute time steps):
|
||||||
|
the table shows the ranks for cases with $2.5 < ADD < 25$
|
||||||
|
(and $25 < ADD < \infty$ in parentheses if they differ)}
|
||||||
|
\label{t:hori:a}
|
||||||
|
\begin{tabular}{|c|ccc|cccccccc|}
|
||||||
|
\hline
|
||||||
|
\multirow{2}{*}{\rotatebox{90}{\thead{\scriptsize{Training}}}}
|
||||||
|
& \multicolumn{3}{c|}{\thead{Benchmarks}}
|
||||||
|
& \multicolumn{8}{c|}{\thead{Horizontal (whole-day-ahead)}} \\
|
||||||
|
\cline{2-12}
|
||||||
|
~ & \textit{naive} & \textit{fnaive} & \textit{paive}
|
||||||
|
& \textit{harima} & \textit{hcroston} & \textit{hets} & \textit{hholt}
|
||||||
|
& \textit{hhwinters} & \textit{hses} & \textit{hsma} & \textit{htheta} \\
|
||||||
|
\hline \hline
|
||||||
|
3 & 11 & 7 (2) & 8 (5) & 5 (7) & 4 & 3
|
||||||
|
& 9 (10) & 10 (9) & 2 (6) & 1 & 6 (8) \\
|
||||||
|
4 & 11 & 7 (2) & 8 (3) & 5 (6) & 4 (5) & 3 (1)
|
||||||
|
& 9 (10) & 10 (9) & 2 (8) & 1 (4) & 6 (7) \\
|
||||||
|
5 & 11 & 7 (2) & 8 (4) & 5 (3) & 4 (9) & 3 (1)
|
||||||
|
& 9 (10) & 10 (5) & 2 (8) & 1 (6) & 6 (7) \\
|
||||||
|
6 & 11 & 8 (5) & 9 (6) & 5 (4) & 4 (7) & 2 (1)
|
||||||
|
& 10 & 7 (2) & 3 (8) & 1 (9) & 6 (3) \\
|
||||||
|
7 & 11 & 8 (5) & 10 (6) & 5 (4) & 4 (7) & 2 (1)
|
||||||
|
& 9 (10) & 7 (2) & 3 (8) & 1 (9) & 6 (3) \\
|
||||||
|
8 & 11 & 9 (5) & 10 (6) & 5 (4) & 4 (7) & 2 (1)
|
||||||
|
& 8 (10) & 7 (2) & 3 (8) & 1 (9) & 6 (3) \\
|
||||||
|
\hline
|
||||||
|
\end{tabular}
|
||||||
|
\end{center}
|
||||||
|
\
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
\captionof{table}{Ranking of classical models on vertical time series
|
||||||
|
($1~\text{km}^2$ pixel size, 60-minute time steps):
|
||||||
|
the table shows the ranks for cases with $2.5 < ADD < 25$
|
||||||
|
(and $25 < ADD < \infty$ in parentheses if they differ)}
|
||||||
|
\label{t:vert:a}
|
||||||
|
\begin{tabular}{|c|cc|ccccc|ccccc|}
|
||||||
|
\hline
|
||||||
|
\multirow{2}{*}{\rotatebox{90}{\thead{\scriptsize{Training}}}}
|
||||||
|
& \multicolumn{2}{c|}{\thead{Benchmarks}}
|
||||||
|
& \multicolumn{5}{c|}{\thead{Vertical (whole-day-ahead)}}
|
||||||
|
& \multicolumn{5}{c|}{\thead{Vertical (real-time)}} \\
|
||||||
|
\cline{2-13}
|
||||||
|
~ & \textit{hets} & \textit{hsma} & \textit{varima} & \textit{vets}
|
||||||
|
& \textit{vholt} & \textit{vses} & \textit{vtheta} & \textit{rtarima}
|
||||||
|
& \textit{rtets} & \textit{rtholt} & \textit{rtses} & \textit{rttheta} \\
|
||||||
|
\hline \hline
|
||||||
|
3 & 2 (10) & 1 (7) & 6 (4) & 8 (6) & 10 (9)
|
||||||
|
& 7 (5) & 11 (12) & 4 (1) & 5 (3) & 9 (8) & 3 (2) & 12 (11) \\
|
||||||
|
4 & 2 (7) & 1 (10) & 6 (4) & 8 (6) & 10 (9)
|
||||||
|
& 7 (5) & 12 (11) & 3 (1) & 5 (3) & 9 (8) & 4 (2) & 11 (12) \\
|
||||||
|
5 & 2 (3) & 1 (10) & 7 (5) & 8 (7) & 10 (9)
|
||||||
|
& 6 & 11 & 4 (1) & 5 (4) & 9 (8) & 3 (2) & 12 \\
|
||||||
|
6 & 2 (1) & 1 (10) & 6 (5) & 8 (7) & 10 (9)
|
||||||
|
& 7 (6) & 11 (12) & 3 (2) & 5 (4) & 9 (8) & 4 (3) & 12 (11) \\
|
||||||
|
7 & 2 (1) & 1 (10) & 8 (5) & 7 & 10 (9)
|
||||||
|
& 6 & 11 (12) & 5 (2) & 4 & 9 (8) & 3 & 12 (11) \\
|
||||||
|
8 & 2 (1) & 1 (9) & 8 (5) & 7 & 10 (8)
|
||||||
|
& 6 & 12 (10) & 5 (2) & 4 & 9 (6) & 3 & 11 \\
|
||||||
|
\hline
|
||||||
|
\end{tabular}
|
||||||
|
\end{center}
|
||||||
|
\
|
||||||
|
|
||||||
|
\pagebreak
|
||||||
|
|
||||||
|
\begin{center}
|
||||||
|
\captionof{table}{Ranking of ML models on vertical time series
|
||||||
|
($1~\text{km}^2$ pixel size, 60-minute time steps):
|
||||||
|
the table shows the ranks for cases with $2.5 < ADD < 25$
|
||||||
|
(and $25 < ADD < \infty$ in parentheses if they differ)}
|
||||||
|
\label{t:ml:a}
|
||||||
|
\begin{tabular}{|c|cccc|cc|}
|
||||||
|
\hline
|
||||||
|
\multirow{2}{*}{\rotatebox{90}{\thead{\scriptsize{Training}}}}
|
||||||
|
& \multicolumn{4}{c|}{\thead{Benchmarks}}
|
||||||
|
& \multicolumn{2}{c|}{\thead{ML}} \\
|
||||||
|
\cline{2-7}
|
||||||
|
~ & \textit{fnaive} & \textit{hets} & \textit{hsma}
|
||||||
|
& \textit{rtarima} & \textit{vrfr} & \textit{vsvr} \\
|
||||||
|
\hline \hline
|
||||||
|
3 & 6 & 2 (5) & 1 (3) & 3 (1) & 5 (2) & 4 \\
|
||||||
|
4 & 6 (5) & 2 (3) & 1 (6) & 3 (2) & 5 (1) & 4 \\
|
||||||
|
5 & 6 (5) & 2 (4) & 1 (6) & 4 (2) & 5 (1) & 3 \\
|
||||||
|
6 & 6 (5) & 2 & 1 (6) & 4 & 5 (1) & 3 \\
|
||||||
|
7 & 6 (5) & 2 (1) & 1 (6) & 4 & 5 (2) & 3 \\
|
||||||
|
8 & 6 (5) & 2 (1) & 1 (6) & 4 & 5 (2) & 3 \\
|
||||||
|
\hline
|
||||||
|
\end{tabular}
|
||||||
|
\end{center}
|
||||||
|
\
|
58
tex/apx/tabular_ml_models.tex
Normal file
58
tex/apx/tabular_ml_models.tex
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
\section{Tabular and Real-time Forecasts without Retraining}
|
||||||
|
\label{tabular_ml_models}
|
||||||
|
|
||||||
|
Regarding the structure of the feature matrix for the ML models in Sub-section
|
||||||
|
\ref{ml_models}, we provide an alternative approach that works without
|
||||||
|
the STL method.
|
||||||
|
Instead of decomposing a time series and arranging the resulting
|
||||||
|
seasonally-adjusted time series $a_t$ into a matrix $\mat{X}$, one can
|
||||||
|
create a matrix with two types of feature columns mapped to the raw
|
||||||
|
observations in $\vec{y}$:
|
||||||
|
While the first group of columns takes all observations of the same time of
|
||||||
|
day over a horizon of, for example, one week ($n_h=7$), the second group
|
||||||
|
takes all observations covering a pre-defined time horizon, for example
|
||||||
|
$3$ hours ($n_r=3$ for 60-minute time steps), preceding the time step to
|
||||||
|
be fitted.
|
||||||
|
Thus, we exploit the two-dimensional structure of time tables as well, and
|
||||||
|
conceptually model historical and recent demand.
|
||||||
|
The alternative feature matrix appears as follows where the first three
|
||||||
|
columns are the historical and the last three the recent demand features:
|
||||||
|
|
||||||
|
$$
|
||||||
|
\vec{y}
|
||||||
|
=
|
||||||
|
\begin{pmatrix}
|
||||||
|
y_T \\
|
||||||
|
y_{T-1} \\
|
||||||
|
\dots \\
|
||||||
|
y_{1+n_hH}
|
||||||
|
\end{pmatrix}
|
||||||
|
~~~~~
|
||||||
|
\mat{X}
|
||||||
|
=
|
||||||
|
\begin{bmatrix}
|
||||||
|
y_{T-H} & y_{T-2H} & \dots & y_{T-n_hH}
|
||||||
|
& y_{T-1} & y_{T-2} & \dots & y_{T-n_r} \\
|
||||||
|
y_{T-1-H} & y_{T-1-2H} & \dots & y_{T-1-n_hH}
|
||||||
|
& y_{T-2} & y_{T-3} & \dots & y_{T-n_r-1} \\
|
||||||
|
\dots & \dots & \dots & \dots
|
||||||
|
& \dots & \dots & \dots & \dots \\
|
||||||
|
y_{1+(n_h-1)H} & y_{1+(n_h-2)H} & \dots & y_1
|
||||||
|
& y^*_{1+n_hH-1} & y^*_{1+n_hH-2} & \dots & y^*_{1+n_hH-n_r}
|
||||||
|
\end{bmatrix}
|
||||||
|
$$
|
||||||
|
\
|
||||||
|
|
||||||
|
Being a detail, we note that the recent demand features lying on the end of
|
||||||
|
the previous day are set to $0$, which is shown with the $^*$ notation
|
||||||
|
above.
|
||||||
|
This alignment of the undecomposed order data $y_t$ ensures that the ML
|
||||||
|
models learn the two seasonal patterns independently.
|
||||||
|
The parameters $n_h$ and $n_r$ must be adapted to the data, but we found the
|
||||||
|
above values to work well.
|
||||||
|
|
||||||
|
As such matrices resemble time tables, we refer to them as tabular.
|
||||||
|
However, we found the ML models with vertical time series to outperform the
|
||||||
|
tabular ML models, which is why we disregarded them in the study.
|
||||||
|
This tabular form could be beneficial for UDPs with a demand that exhibits
|
||||||
|
a weaker seasonality such as a meal delivery platform.
|
102
tex/glossary.tex
102
tex/glossary.tex
|
@ -5,6 +5,9 @@
|
||||||
\newglossaryentry{cv}{
|
\newglossaryentry{cv}{
|
||||||
name=CV, description={Cross Validation}
|
name=CV, description={Cross Validation}
|
||||||
}
|
}
|
||||||
|
\newglossaryentry{mase}{
|
||||||
|
name=MASE, description={Mean Absolute Scaled Error}
|
||||||
|
}
|
||||||
\newglossaryentry{ml}{
|
\newglossaryentry{ml}{
|
||||||
name=ML, description={Machine Learning}
|
name=ML, description={Machine Learning}
|
||||||
}
|
}
|
||||||
|
@ -27,4 +30,103 @@
|
||||||
name=VRP, description={Vehicle Routing Problem}
|
name=VRP, description={Vehicle Routing Problem}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
% Model names.
|
||||||
|
\newglossaryentry{naive}{
|
||||||
|
name=naive, description={(Seasonal) Na\"{i}ve Method}
|
||||||
|
}
|
||||||
|
\newglossaryentry{fnaive}{
|
||||||
|
name=fnaive, description={"Flexible" STL Decomposition,
|
||||||
|
with tuned ns parameter}
|
||||||
|
}
|
||||||
|
\newglossaryentry{pnaive}{
|
||||||
|
name=pnaive, description={"Periodic" STL Decomposition,
|
||||||
|
with ns parameter set to large number}
|
||||||
|
}
|
||||||
|
\newglossaryentry{trivial}{
|
||||||
|
name=trivial, description={Trivial Method}
|
||||||
|
}
|
||||||
|
\newglossaryentry{hcroston}{
|
||||||
|
name=hcroston, description={Croston's Method,
|
||||||
|
trained on horizontal time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{hholt}{
|
||||||
|
name=hholt, description={Holt's Linear Trend Method,
|
||||||
|
trained on horizontal time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{vholt}{
|
||||||
|
name=vholt, description={Holt's Linear Trend Method,
|
||||||
|
trained on vertical time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{rtholt}{
|
||||||
|
name=rtholt, description={Holt's Linear Trend Method,
|
||||||
|
(re)trained on vertical time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{hhwinters}{
|
||||||
|
name=hhwinters, description={Holt-Winter's Seasonal Method,
|
||||||
|
trained on horizontal time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{hses}{
|
||||||
|
name=hses, description={Simple Exponential Smoothing Method,
|
||||||
|
trained on horizontal time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{vses}{
|
||||||
|
name=vses, description={Simple Exponential Smoothing Method,
|
||||||
|
trained on vertical time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{rtses}{
|
||||||
|
name=rtses, description={Simple Exponential Smoothing Method,
|
||||||
|
(re)trained on vertical time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{hsma}{
|
||||||
|
name=hsma, description={Simple Moving Average Method,
|
||||||
|
trained on horizontal time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{htheta}{
|
||||||
|
name=htheta, description={Theta Method,
|
||||||
|
trained on horizontal time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{vtheta}{
|
||||||
|
name=vtheta, description={Theta Method,
|
||||||
|
trained on vertical time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{rttheta}{
|
||||||
|
name=rttheta, description={Theta Method,
|
||||||
|
(re)trained on vertical time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{hets}{
|
||||||
|
name=hets, description={ETS State Space Method,
|
||||||
|
trained on horizontal time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{vets}{
|
||||||
|
name=vets, description={ETS State Space Method,
|
||||||
|
trained on vertical time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{rtets}{
|
||||||
|
name=rtets, description={ETS State Space Method,
|
||||||
|
(re)trained on vertical time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{harima}{
|
||||||
|
name=harima, description={Autoregressive Integrated Moving Average
|
||||||
|
Method,
|
||||||
|
trained on horizontal time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{varima}{
|
||||||
|
name=varima, description={Autoregressive Integrated Moving Average
|
||||||
|
Method,
|
||||||
|
trained on vertical time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{rtarima}{
|
||||||
|
name=rtarima, description={Autoregressive Integrated Moving Average
|
||||||
|
Method,
|
||||||
|
(re)trained on vertical time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{vrfr}{
|
||||||
|
name=vrfr, description={Random Forest Regression Method,
|
||||||
|
trained on vertical time series}
|
||||||
|
}
|
||||||
|
\newglossaryentry{vsvr}{
|
||||||
|
name=vsvr, description={Support Vector Regression Method,
|
||||||
|
trained on vertical time series}
|
||||||
|
}
|
||||||
|
|
||||||
\printglossaries
|
\printglossaries
|
|
@ -4,6 +4,12 @@
|
||||||
\usepackage[acronym]{glossaries}
|
\usepackage[acronym]{glossaries}
|
||||||
\makeglossaries
|
\makeglossaries
|
||||||
|
|
||||||
|
% Enable captions for figures and tables.
|
||||||
|
\usepackage{caption}
|
||||||
|
|
||||||
|
% Enable diagonal lines in tables.
|
||||||
|
\usepackage{static/slashbox}
|
||||||
|
|
||||||
% Make opening quotes look different than closing quotes.
|
% Make opening quotes look different than closing quotes.
|
||||||
\usepackage[english=american]{csquotes}
|
\usepackage[english=american]{csquotes}
|
||||||
\MakeOuterQuote{"}
|
\MakeOuterQuote{"}
|
||||||
|
|
|
@ -26,6 +26,16 @@ volume={1},
|
||||||
pages={461--466}
|
pages={461--466}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@article{barbour2014,
|
||||||
|
title={psd: Adaptive, sine multitaper power spectral density estimation for R},
|
||||||
|
author={Barbour, Andrew J and Parker, Robert L},
|
||||||
|
year={2014},
|
||||||
|
journal={Computers \& Geosciences},
|
||||||
|
volume={63},
|
||||||
|
pages={1--8},
|
||||||
|
publisher={Elsevier}
|
||||||
|
}
|
||||||
|
|
||||||
@misc{bell2018,
|
@misc{bell2018,
|
||||||
title = {Forecasting at Uber: An Introduction},
|
title = {Forecasting at Uber: An Introduction},
|
||||||
author={Bell, Franziska and Smyl, Slawek},
|
author={Bell, Franziska and Smyl, Slawek},
|
||||||
|
@ -139,6 +149,16 @@ number={1},
|
||||||
pages={3--73}
|
pages={3--73}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@article{croston1972,
|
||||||
|
title={Forecasting and Stock Control for intermittent Demands},
|
||||||
|
author={Croston, J Do},
|
||||||
|
year={1972},
|
||||||
|
journal={Journal of the Operational Research Society},
|
||||||
|
volume={23},
|
||||||
|
number={3},
|
||||||
|
pages={289--303}
|
||||||
|
}
|
||||||
|
|
||||||
@book{dagum2016,
|
@book{dagum2016,
|
||||||
title={Seasonal Adjustment Methods and Real Time Trend-Cycle Estimation},
|
title={Seasonal Adjustment Methods and Real Time Trend-Cycle Estimation},
|
||||||
author={Dagum, Estela and Bianconcini, Silvia},
|
author={Dagum, Estela and Bianconcini, Silvia},
|
||||||
|
@ -275,6 +295,17 @@ number={2},
|
||||||
pages={287--290}
|
pages={287--290}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@article{hyndman2006,
|
||||||
|
title={Another Look at Measures of Forecast Accuracy},
|
||||||
|
author={Hyndman, Rob and Koehler, Anne},
|
||||||
|
year={2006},
|
||||||
|
journal={International Journal of Forecasting},
|
||||||
|
volume={22},
|
||||||
|
number={4},
|
||||||
|
pages={679--688},
|
||||||
|
publisher={Elsevier}
|
||||||
|
}
|
||||||
|
|
||||||
@article{hyndman2008a,
|
@article{hyndman2008a,
|
||||||
title={Automatic Time Series Forecasting: The forecast package for R},
|
title={Automatic Time Series Forecasting: The forecast package for R},
|
||||||
author={Hyndman, Rob and Khandakar, Yeasmin},
|
author={Hyndman, Rob and Khandakar, Yeasmin},
|
||||||
|
@ -298,6 +329,18 @@ year={2018},
|
||||||
publisher={OTexts}
|
publisher={OTexts}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@article{kim2016,
|
||||||
|
title={A new Metric of Absolute Percentage Error for Intermittent Demand
|
||||||
|
Forecasts},
|
||||||
|
author={Kim, Sungil and Kim, Heeyoung},
|
||||||
|
year={2016},
|
||||||
|
journal={International Journal of Forecasting},
|
||||||
|
volume={32},
|
||||||
|
number={3},
|
||||||
|
pages={669--679},
|
||||||
|
publisher={Elsevier}
|
||||||
|
}
|
||||||
|
|
||||||
@article{kwiatkowski1992,
|
@article{kwiatkowski1992,
|
||||||
title={Testing the null hypothesis of stationarity against the alternative of a
|
title={Testing the null hypothesis of stationarity against the alternative of a
|
||||||
unit root: How sure are we that economic time series have a unit root?},
|
unit root: How sure are we that economic time series have a unit root?},
|
||||||
|
@ -319,6 +362,17 @@ howpublished = {\url{https://eng.uber.com/neural-networks/}},
|
||||||
note = {Accessed: 2020-10-01}
|
note = {Accessed: 2020-10-01}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@article{ma2018,
|
||||||
|
title={Using the Gradient Boosting Decision Tree to Improve the Delineation of
|
||||||
|
Hourly Rain Areas during the Summer from Advanced Himawari Imager Data},
|
||||||
|
author={Ma, Liang and Zhang, Guoping and Lu, Er},
|
||||||
|
year={2018},
|
||||||
|
journal={Journal of Hydrometeorology},
|
||||||
|
volume={19},
|
||||||
|
number={5},
|
||||||
|
pages={761-776},
|
||||||
|
}
|
||||||
|
|
||||||
@article{masmoudi2018,
|
@article{masmoudi2018,
|
||||||
title={The dial-a-ride problem with electric vehicles and battery
|
title={The dial-a-ride problem with electric vehicles and battery
|
||||||
swapping stations},
|
swapping stations},
|
||||||
|
@ -330,6 +384,15 @@ volume={118},
|
||||||
pages={392--420}
|
pages={392--420}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@inproceedings{mason2000,
|
||||||
|
title={Boosting algorithms as gradient descent},
|
||||||
|
author={Mason, Llew and Baxter, Jonathan and Bartlett, Peter L
|
||||||
|
and Frean, Marcus R},
|
||||||
|
year={2000},
|
||||||
|
booktitle={Advances in neural information processing systems},
|
||||||
|
pages={512--518}
|
||||||
|
}
|
||||||
|
|
||||||
@inproceedings{mueller1997,
|
@inproceedings{mueller1997,
|
||||||
title={Predicting Time Series with Support Vector Machines},
|
title={Predicting Time Series with Support Vector Machines},
|
||||||
author={M{\"u}ller, Klaus-Robert and Smola, Alexander and R{\"a}tsch, Gunnar
|
author={M{\"u}ller, Klaus-Robert and Smola, Alexander and R{\"a}tsch, Gunnar
|
||||||
|
@ -367,6 +430,18 @@ number={5},
|
||||||
pages={311--315}
|
pages={311--315}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@article{prestwich2014,
|
||||||
|
title={Mean-based Error Measures for Intermittent Demand Forecasting},
|
||||||
|
author={Prestwich, Steven and Rossi, Roberto and Tarim, Armagan
|
||||||
|
and Hnich, Brahim},
|
||||||
|
year={2014},
|
||||||
|
journal={International Journal of Production Research},
|
||||||
|
volume={52},
|
||||||
|
number={22},
|
||||||
|
pages={6782--6791},
|
||||||
|
publisher={Taylor \& Francis}
|
||||||
|
}
|
||||||
|
|
||||||
@incollection{scholkopf1998,
|
@incollection{scholkopf1998,
|
||||||
title={Fast Approximation of Support Vector Kernel Expansions, and an
|
title={Fast Approximation of Support Vector Kernel Expansions, and an
|
||||||
Interpretation of Clustering as Approximation in Feature Spaces},
|
Interpretation of Clustering as Approximation in Feature Spaces},
|
||||||
|
@ -378,6 +453,13 @@ publisher={Springer},
|
||||||
pages={125--132}
|
pages={125--132}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@book{singleton2017,
|
||||||
|
title={Urban Analytics},
|
||||||
|
author={Singleton, Alex David and Spielman, Seth and Folch, David},
|
||||||
|
year={2017},
|
||||||
|
publisher={Sage}
|
||||||
|
}
|
||||||
|
|
||||||
@article{smola2004,
|
@article{smola2004,
|
||||||
title={A Tutorial on Support Vector Regression},
|
title={A Tutorial on Support Vector Regression},
|
||||||
author={Smola, Alex and Sch{\"o}lkopf, Bernhard},
|
author={Smola, Alex and Sch{\"o}lkopf, Bernhard},
|
||||||
|
@ -398,6 +480,17 @@ pages={285--292},
|
||||||
publisher={MIT, Cambridge, MA, USA}
|
publisher={MIT, Cambridge, MA, USA}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@article{syntetos2005,
|
||||||
|
title={The Accuracy of Intermittent Demand Estimates},
|
||||||
|
author={Syntetos, Aris and Boylan, John},
|
||||||
|
year={2005},
|
||||||
|
journal={International Journal of forecasting},
|
||||||
|
volume={21},
|
||||||
|
number={2},
|
||||||
|
pages={303--314},
|
||||||
|
publisher={Elsevier}
|
||||||
|
}
|
||||||
|
|
||||||
@article{taylor2003,
|
@article{taylor2003,
|
||||||
title={Exponential Smoothing with a Damped Multiplicative Trend},
|
title={Exponential Smoothing with a Damped Multiplicative Trend},
|
||||||
author={Taylor, James},
|
author={Taylor, James},
|
||||||
|
@ -442,6 +535,18 @@ volume={118},
|
||||||
pages={496--512}
|
pages={496--512}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@article{winkenbach2015,
|
||||||
|
title={Enabling urban logistics services at La Poste through
|
||||||
|
multi-echelon location-routing},
|
||||||
|
author={Winkenbach, Matthias and Kleindorfer, Paul R and Spinler, Stefan},
|
||||||
|
year={2015},
|
||||||
|
journal={Transportation Science},
|
||||||
|
volume={50},
|
||||||
|
number={2},
|
||||||
|
pages={520--540},
|
||||||
|
publisher={INFORMS}
|
||||||
|
}
|
||||||
|
|
||||||
@article{winters1960,
|
@article{winters1960,
|
||||||
title={Forecasting Sales by Exponentially Weighted Moving Averages},
|
title={Forecasting Sales by Exponentially Weighted Moving Averages},
|
||||||
author={Winters, Peter},
|
author={Winters, Peter},
|
||||||
|
|
Loading…
Reference in a new issue