%~Mouliné par MaN_auto v.0.40.4 (bbb487d5) 2026-04-29 18:21:38
\documentclass[CRMATH,Unicode,biblatex,published]{cedram}

\TopicFR{\'Equations aux dérivées partielles}
\TopicEN{Partial differential equations}

\addbibresource{CRMATH_Letrouit_20250847.bib}

\usepackage{mathtools}

\usetikzlibrary{patterns, patterns.meta}
\usetikzlibrary{calc,arrows.meta,positioning}
\usetikzlibrary{decorations.pathreplacing}

\newcommand{\R}{\mathbb{R}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\X}{\mathcal{X}}
\newcommand{\Y}{\mathcal{Y}}
\newcommand{\ppow}{\beta}
\newcommand{\B}{B}

\DeclareMathOperator{\dist}{dist}
\DeclareMathOperator{\Var}{Var}

\newcommand{\dd}{\mathop{}\!{\operatorfont{d}}}

\hyphenation{mar-gi-nals}

%%%---------------------------------------------------------------------------

%%% ARROWS

%%% \to
\renewcommand*{\to}{\mathchoice{\longrightarrow}{\rightarrow}{\rightarrow}{\rightarrow}}

%%% \mapsto
\let\oldmapsto\mapsto
\renewcommand*{\mapsto}{\mathchoice{\longmapsto}{\oldmapsto}{\oldmapsto}{\oldmapsto}}

%%%---------------------------------------------------------------------------

%%% DELIMITERS

\DeclarePairedDelimiter{\parens}{\lparen}{\rparen}
\DeclarePairedDelimiter{\braces}{\{}{\}}
\DeclarePairedDelimiter{\abs}{\lvert}{\rvert}
\DeclarePairedDelimiter{\norm}{\lVert}{\rVert}
\DeclarePairedDelimiter{\floor}{\lfloor}{\rfloor}
\DeclarePairedDelimiterX\braket[2]{\langle}{\rangle}{#1\,\delimsize\vert\,\mathopen{}#2}

%%%---------------------------------------------------------------------------

%%% VERTICAL BAR

%%% ''Such that'' macro
\newcommand*{\st}[1][]{\;#1\vert\;}

%%%---------------------------------------------------------------------------

\graphicspath{{./figures/}}

\newcommand*{\mk}{\mkern -1mu}
\newcommand*{\Mk}{\mkern -2mu}
\newcommand*{\mK}{\mkern 1mu}
\newcommand*{\MK}{\mkern 2mu}

\hypersetup{urlcolor=purple, linkcolor=blue, citecolor=red}

\newcommand*{\relabel}{\renewcommand{\labelenumi}{(\theenumi)}}
\newcommand*{\romanenumi}{\renewcommand*{\theenumi}{\roman{enumi}}\relabel}
\newcommand*{\Romanenumi}{\renewcommand*{\theenumi}{\Roman{enumi}}\relabel}
\newcommand*{\alphenumi}{\renewcommand*{\theenumi}{\alph{enumi}}\relabel}
\newcommand*{\Alphenumi}{\renewcommand*{\theenumi}{\Alph{enumi}}\relabel}
\let\oldtilde\tilde
\renewcommand*{\tilde}[1]{\mathchoice{\widetilde{#1}}{\widetilde{#1}}{\oldtilde{#1}}{\oldtilde{#1}}}
\let\oldhat\hat
\renewcommand*{\hat}[1]{\mathchoice{\widehat{#1}}{\widehat{#1}}{\oldhat{#1}}{\oldhat{#1}}}
\let\oldforall\forall
\renewcommand*{\forall}{\mathrel{\oldforall}}

\title{Unstable optimal transport maps}
\alttitle{Instabilité d'applications de transport optimal}

\author{\firstname{Cyril} \lastname{Letrouit}\CDRorcid{0000-0001-5469-9249}}
\address{Université Paris-Saclay, CNRS, Laboratoire de mathématiques d’Orsay, 91405 Orsay, France} \email{cyril.letrouit@universite-paris-saclay.fr}

\thanks{The author acknowledges the support of the Agence nationale de la recherche, through the PEPR PDE-AI project (ANR-23-PEIA-0004)}
\CDRGrant[ANR]{ANR-23-PEIA-0004}

\altkeywords{\kwd{Transport optimal} \kwd{stabilité} \kwd{unicité} \kwd{distance de Wasserstein}}
\keywords{\kwd{Optimal transport} \kwd{stability} \kwd{uniqueness} \kwd{Wasserstein distance}}

\subjclass{49Q22}

\begin{abstract}
The stability of optimal transport maps with respect to perturbations of the marginals is a question of interest for several reasons, ranging from numerical analysis and statistics to the justification of the linearized optimal transport framework. Under various assumptions on the source measure, it is known that optimal transport maps are stable with respect to variations of the target measure.

In this note, we focus on the mechanisms that can, on the contrary, lead to \emph{instability}. We identify two of them. We first show that instability may arise from the unboundedness of the density: we exhibit a source density on the unit ball of $\mathbb{R}^d$ which blows up at two points of the boundary and for which optimal transport maps are highly unstable. Then we prove that even for uniform densities on bounded open sets, optimal transport maps can be rather unstable sufficiently close to configurations where uniqueness of optimal plans is lost.
\end{abstract}

\begin{altabstract}
La stabilité des applications de transport optimal par rapport aux perturbations des marginales est une question importante à la fois en analyse numérique et en statistiques. Son étude est aussi directement liée à la possibilité de linéariser le transport optimal. Sous diverses hypothèses sur la mesure source, il est connu que les applications de transport optimal sont stables vis-à-vis des variations de la mesure cible.

Dans cette note, nous nous concentrons au contraire sur les mécanismes susceptibles de conduire à de l’\emph{instabilité}. Nous en identifions deux. Nous montrons tout d’abord que l’instabilité peut provenir du caractère non borné de la densité : nous exhibons une densité source sur la boule unité de $\mathbb{R}^d$ qui diverge en deux points du bord et pour laquelle les applications de transport optimal sont fortement instables. Nous démontrons ensuite que, même pour des densités uniformes sur des ouverts bornés, les applications de transport optimal peuvent être assez instables à proximité de configurations où l’unicité des plans optimaux est perdue.
\end{altabstract}

\COI{The author does not work for, advise, own shares in, or receive funds from any organization that could benefit from this article, and has declared no affiliations other than their research organizations.}

\dateposted{2026-05-18}
\begin{document}
%\input{CR-pagedemetas}
%\end{document}
\maketitle

\section{Introduction}

\subsection{Main results}

Brenier's theorem is one of the foundational results of optimal transport theory~\cite{brenier,brenierCPAM}. It provides simple conditions guaranteeing the existence of an optimal map transporting a given ``source'' probability measure $\rho$ on $\R^d$ to another ``target'' probability measure $\mu$ on $\R^d$, when the cost of transporting mass is the squared Euclidean distance. If $\#$ denotes the pushforward operation on measures, it asserts the $\rho$-a.e.\ existence and uniqueness of a solution to the optimization problem (in $T$)
\[
\inf  \braces[\Bigg]{\int_{\R^d} \abs[\big]{x-T(x)}^2 \dd\rho(x) \st[\Bigg] T_\#\rho=\mu}
\]
as soon as $\rho,\mu$ belong to the set $\mathcal{P}_2(\R^d)$ of probability measures on $\R^d$ with finite second moment, and $\rho$ is absolutely continuous with respect to the Lebesgue measure. This solution is denoted by $T_\mu\in L^2(\rho)$ in the sequel; the dependence on $\rho$ is omitted in the notation since $\rho$ will be fixed throughout.

After existence and uniqueness, the final ingredient for a well-posed problem is \emph{stability}. Brenier~\cite{brenierCPAM} proved that his construction is stable with respect to perturbations of the target measure, for a fixed source measure $\rho$: he showed that the map $\mu \mapsto T_\mu$ is continuous from $\parens[\big]{\mathcal{P}_2(\R^d),W_2}$ to $L^2(\rho)$, where $W_p$ denotes the $p$-Wasserstein distance.

In recent years, several people have tried to quantify this stability, i.e., to provide conditions on~$\rho$ that guarantee quantitative bounds on the $L^2$-distance between $T_\mu$ and $T_\nu$ in terms of Wasserstein distances between $\mu$ and $\nu$. These quantitative stability bounds have been established under various assumptions on $\rho$ (see the literature review in Section~\ref{s:litreview}). They are of the form
\begin{equation}\label{e:stabmapoule}
\forall \mu,\nu\in\mathcal{P}_2(\Y),
\quad \norm{T_\mu-T_\nu}_{L^2(\rho)} \leq CW_p(\mu,\nu)^{\alpha}
\end{equation}
for $C,\alpha>0$, $p\geq 1$ and $\Y\subset\R^d$. There are multiple motivations for looking at such estimates. For instance, they provide quantitative rates of convergence of numerical approximations of the optimal transport map; they serve as a foundational ground for the statistical estimation of optimal transport maps (see \cite[Chapter~3.2]{chewi} and~\cite{balakrishnan}); and finally they justify theoretically the linearized optimal transport framework (see Section~\ref{s:litreview}).

However, it has never been proven that optimal transport maps could be \emph{unstable}. In this note, we fill this gap by elucidating two mechanisms leading to instability, or partial loss of stability, of optimal transport maps. Our results reveal limitations on the generality under which quantitative stability results may hold.

We first construct an absolutely continuous source measure $\rho$ for which optimal transport maps are \emph{highly unstable}. More precisely, in Theorem~\ref{t:highlyunstable}, we construct $\rho$ whose density is bounded below on the (closed) unit ball $B_{\R^d}(0,1)$ such that for any ball $\Y=B_{\R^d}(0,R)$, any $C,\alpha>0$ and $p\geq 1$, the bound~\eqref{e:stabmapoule} fails. In what follows, $\mathcal{P}(\X)$ denotes the set of probability measures on $\X\subset\R^d$.

\begin{theo}\label{t:highlyunstable}
Let $d\geq 2$. There exists an absolutely continuous $\rho\in\mathcal{P}\parens[\big]{B_{\R^d}(0,1)}$ with density bounded below, such that for any ball $\Y=B_{\R^d}(0,R)$ with $R>0$, any $C,\alpha>0$ and $p\geq 1$, the following inequality fails:
\begin{equation}\label{e:stabmap}
\forall \mu,\nu\in\mathcal{P}(\Y),
\quad \norm{T_\mu-T_\nu}_{L^2(\rho)} \leq CW_p(\mu,\nu)^{\alpha}.
\end{equation}
\end{theo}

Behind this result, the mechanism leading to instability is the blow-up of the density $\rho$. The density $\rho$ constructed to prove Theorem~\ref{t:highlyunstable} actually blows up at two points $A$ and $A'$ of the boundary of $B_{\R^d}(0,1)$, at a carefully chosen rate. It may be expected that such source measure leads to some form of instability, since the extreme case of $\rho$ being of the form \mbox{$\frac12(\delta_A+\delta_{A'})$} generates extreme instabilities ---~but it is not absolutely continuous and lacks existence of an optimal transport map for generic target measures, whereas in Theorem~\ref{t:highlyunstable} we look for an absolutely continuous $\rho$. The results of~\cite{letrouitmerigot} show that the blow-up rate that we choose is optimal, in the sense that if $\rho$ blows up slightly slower at the boundary of $B_{\R^d}(0,1)$, then~\eqref{e:stabmap} holds for some $C,\alpha>0$ and $p=1$ (see Remark~\ref{r:superpolyn}).

Apart from unbounded densities, if we focus on source densities bounded above and below on bounded open sets, some loss of stability can nevertheless occur close enough to configurations where optimal plans are non-unique. It is actually natural to expect loss of stability just before loss of uniqueness. To illustrate this idea, we show the following result ---~whose relevance and numerology is explained in Section~\ref{s:regularity}~--- using as a source measure the \emph{uniform probability measure} $\rho$ on a carefully chosen \emph{bounded open set} $\X\subset\R^d$.

\begin{theo}\label{t:unstableW1}
Let $d\geq 2$. There exists a bounded open set $\X\subset\R^d$ and a compact set $\Y$ such that if $\rho$ denotes the uniform probability measure on $\X$, then
\begin{enumerate}
\item \label{t:unstableW1_1} for any $C>0$, $p\geq 1$ and any $\alpha>\frac{p}{2(p+1)}$ (in particular for $\alpha=\frac12$), the following inequality fails:
\begin{equation}\label{e:stabmap2}
\forall \mu,\nu\in\mathcal{P}(\Y),
\quad \norm{T_\mu-T_\nu}_{L^2(\rho)} \leq CW_p(\mu,\nu)^{\alpha};
\end{equation}
\item \label{t:unstableW1_2} for any $p\geq 1$ and $\alpha<1/6$ the inequality~\eqref{e:stabmap2} holds for some $C>0$.
\end{enumerate}
\end{theo}

The bounded set $\X$ used to prove Theorem~\ref{t:unstableW1} is described in Section~\ref{s:ideacstr}. It repeats at smaller and smaller scales a same structure, getting closer at smaller scales to a configuration where uniqueness of optimal transport maps is lost. Nevertheless $\X$ is open, so $\rho$ remains in a setting where Brenier's theorem applies, and uniqueness is guaranteed. Let us also mention that our results hold not only for the quadratic cost, but also for arbitrary costs of the form $h\parens[\big]{\abs{x-y}}$ with $h \colon \R_+ \to \R$ increasing and strictly convex, in particular for $h(r)=r^q$, $q>1$ (see Remark~\ref{r:extgencost}).

\subsection{Regularity of the map \texorpdfstring{$\mu \mapsto T_\mu$}{mu mapsto T\_ mu}} \label{s:regularity}

Our results shed light on the regularity of the map $\mu \mapsto T_\mu$ from $\parens[\big]{\mathcal{P}(\Y),W_2}$ to $L^2(\rho)$, which reflects the stability properties of the optimal transport maps with source measure $\rho$ with respect to perturbations of the marginals. Gigli~\cite[Theorem~5.1]{gigli} proved in 2011 that in some situations, this map is not better than $\frac12$-Hölder. Since then, it remains an open question to determine under which conditions this regularity is actually achieved: for instance, Gigli~\cite[Corollary~3.4]{gigli} (see also~\cite[Theorem~2.3]{merigot}) showed that $\frac12$-Hölder regularity is achieved at any $\mu$ such that $T_\mu$ is Lipschitz. This result plays a key role in statistical optimal transport (see \cite[Chapter~3.2]{chewi} and~\cite{balakrishnan}). It is believed that $\frac12$-Hölder regularity actually holds in greater generality, and in particular the following conjecture has emerged for a few years.

\begin{conj}\label{conj1/2}
Let $\rho$ be uniform on a compact convex set of\/ $\R^d$, and let $\Y$ be compact. Then there exists $C>0$ such that
\[
\forall \mu,\nu\in\mathcal{P}(\Y),
\quad \norm{T_\mu-T_\nu}_{L^2(\rho)} \leq CW_2(\mu,\nu)^{1/2}.
\]
\end{conj}

This inequality is known with an exponent $1/6$ in the right-hand side (see~\cite{delmer} and Theorem~\ref{t:avecquentin} below), and the difficulty is therefore to prove it with the sharp exponent $1/2$. There is some hope that the above conjecture holds under weaker assumptions on the support of $\rho$, still assuming $\rho$ bounded above and below on its support.

Before this note, no result had shown that the map $\mu \mapsto T_\mu$ (from $\parens[\big]{\mathcal{P}(\Y),W_2}$ to $L^2(\rho)$) could fail to be $\frac12$-Hölder, even for other choices of an absolutely continuous probability measure $\rho$. In the proof of Theorem~\ref{t:unstableW1} (with $p=2$), we show that for $\rho$ uniform on a well-chosen bounded open set, this map is in fact not better than $\frac13$-Hölder at some (explicit) $\mu$. And Theorem~\ref{t:highlyunstable} shows that if $\rho$ is allowed to have unbounded density, then it can happen that for any $\alpha>0$ the map is not $\alpha$-Hölder.

\subsection{Previous works}\label{s:litreview}

The literature has been focused so far on quantitative \emph{stability} inequalities, of the form~\eqref{e:stabmap}. They have been established under various assumptions on $\rho$. After works by Gigli~\cite{gigli}, Berman~\cite{berman}, Mérigot, Delalande \& Chazal~\cite{merigot}, and Delalande \& Mérigot~\cite{delmer}, the following result has been achieved in~\cite{letrouitmerigot}.

\begin{theo}[{\cite[Theorem~1.7]{letrouitmerigot}}]\label{t:avecquentin}
Let $\X\subset\R^d$ be a John domain with rectifiable boundary, and let~$\rho$ be a probability density on $\X$, bounded from above and below by positive constants. Then, for any compact set $\mathcal{Y}$, there exists $C_{\rho,\mathcal{Y}}>0$ such that for any probability measures $\mu,\nu$ supported in~$\mathcal{Y}$,
\begin{equation}\label{e:stabmapjohn2}
\norm{T_\mu-T_\nu}_{L^2(\rho)}\leq C_{\rho,\mathcal{Y}}W_1(\mu,\nu)^{1/6}.
\end{equation}
\end{theo}

Recall that any bounded and connected Lipschitz domain is a John domain, and therefore Theorem~\ref{t:avecquentin} applies for instance in this case. The paper~\cite{letrouitmerigot} also establishes similar stability inequalities for log-concave $\rho$, and for $\rho$ blowing-up (or decaying) at some controlled rate at the boundary of a smooth compact set. Quantitative stability inequalities have also been proved for optimal transport maps with respect to $p$-costs in $\R^d$~\cite{mischler}, and the squared distance cost on Riemannian manifolds~\cite{kitagawa}.

Quantitative stability results were primarily motivated by numerical analysis questions: if $\mu$ is known only through an approximation $\nu=\widehat{\mu}$ (for instance through samples), is it true that the optimal transport map $T_{\widehat{\mu}}$, which one may compute for instance through semi-discrete optimal transport, is not far from $T_\mu$?

Quantitative stability inequalities, when they hold, also serve as a justification for the linearized optimal transport framework introduced in~\cite{slepcev} (and used later in several applications): the mapping $\mu \mapsto T_{\mu}$ provides an embedding of $\parens[\big]{\mathcal{P}(\Y),W_2}$ into the Hilbert space $L^2(\rho,\R^d)$, and this embedding allows one to apply the standard ``Hilbertian'' statistical toolbox to measure-valued data. This embedding is distance-increasing, meaning that $\norm{T_\mu - T_\nu}_{L^2(\rho)}\geq W_2(\mu,\nu)$, and stability estimates such as~\eqref{e:stabmapjohn2} (combined with the fact that $W_1\leq W_2$) show that it is bi-Hölder continuous when $\rho$ satisfies the assumptions of Theorem~\ref{t:avecquentin}. In other words, the distance $d(\mu,\nu) = \norm{T_\mu - T_\nu}_{L^2(\rho)}$ preserves in a rough way the geometry associated to the Wasserstein distance. However, recall that some results show the impossibility of embedding (in a very coarse sense ---~in particular in a bi-Hölder way) Wasserstein spaces over $\R^d$ ($d\geq 3$) into Banach spaces of non-trivial type such as Hilbert spaces (see e.g.~\cite{andoni}); this is why the literature about quantitative stability inequality has mostly focused on the case where targets are taken over a \emph{compact set} $\Y$.

On the side of instability, the only known results hold merely for \emph{Kantorovich potentials}. Recall that given $\rho,\mu\in\mathcal{P}_2(\R^d)$ with $\rho$ absolutely continuous, a Kantorovich potential is a convex function whose gradient is equal to the Brenier map $T_\mu$. If the support of $\rho$ is connected, then there exists for each $\mu\in\mathcal{P}_2(\R^d)$ a unique Kantorovich potential $\phi_\mu$ satisfying $\int \phi_\mu \dd\rho=0$. The paper~\cite{letrouitmerigot} provides examples of uniform source measures $\rho$ on non-John domains $\X$ (but still bounded and connected) for which Kantorovich potentials are highly unstable in the same sense as above: the inequality
\[
\forall \mu,\nu\in\mathcal{P}(\Y),
\quad \norm{\phi_\mu-\phi_\nu}_{L^2(\rho)} \leq CW_p(\mu,\nu)^{\alpha}
\]
fails for any $C,\alpha>0$ and $p\geq 1$. However it does not provide examples of unstable transport maps.

\section{Proof of Theorem~\ref{t:highlyunstable}}

We construct $\rho$ an absolutely continuous probability measure over the (closed) unit ball $B_{\R^d}(0,1)$. Its density is defined as a function of the distance to two points $A$ and $A'$, and blows up at $A$ and $A'$ while being uniformly bounded elsewhere. In the sequel, absolutely continuous measures with respect to the Lebesgue measure are identified with their density.

Let $A=(1,0,\dotsc,0)\in\R^d$, $A'=(-1,0,\dotsc,0)\in\R^d$, and $\mathcal{E}=\{A,A'\}$, and let
\begin{equation}\label{e:defoff}
f \colon r \mapsto r^{-d}\min\parens[\big]{1,(\log r)^{-2}}
\end{equation}
for $r>0$. Denote by $\dist(x,\mathcal{E})$ the Euclidean distance from $x$ to the set $\mathcal{E}$. We choose $c_0>0$ in a way that the density
\[
\rho(x)=c_0f\parens[\big]{\dist(x,\mathcal{E})}
\]
on $B_{\R^d}(0,1)$ is a probability density.

To see that this is possible, let us verify that the function $g \colon x \mapsto f\parens[\big]{\dist(x,\mathcal{E})}$ is integrable on $B_{\R^d}(0,1)$. We only need to check integrability close to $\mathcal{E}$. Taking polar coordinates around $A$, $\varepsilon>0$ small, and denoting by $\sigma_{d-1}$ the area of the $(d-1)$-dimensional unit sphere, we have
\[
\int_{B_{\R^d}(A,\varepsilon)\cap B_{\R^d}(0,1)} g(x) \dd x\leq \sigma_{d-1} \int_0^\varepsilon r^{-d}(\log r)^{-2} r^{d-1} \dd r = \sigma_{d-1}\int_0^\varepsilon \frac{1}{r (\log r)^2} \dd r <+\infty.
\]
The same computation holds replacing $A$ by $A'$. This shows the existence of $c_0>0$ as above.

We assume $R=1$ in the sequel, i.e., target measures are supported in $B_{\R^d}(0,1)$. Theorem~\ref{t:highlyunstable} follows for arbitrary $R$ by a scaling argument. Let us consider $B_\theta = \parens[\big]{\sin(\theta),\cos(\theta),0,\dotsc,0}$ and $B_\theta' = \parens[\big]{-\sin(\theta),-\cos(\theta),0,\dotsc,0}$ for $\theta\in\R$. We set
\[
\mu_\theta=\frac12(\delta_{B_\theta}+\delta_{B_\theta'}).
\]
We study the inequality~\eqref{e:stabmap} close to $\theta=0$. Since $\rho$ is invariant under the transformation $x \mapsto -x$, the optimal transport map from $\rho$ to $\mu_\theta$ is the map
\[
T_{\mu_\theta} \colon x \mapsto
\begin{cases}
	B_\theta		& \text{if $\langle x,B_\theta\rangle > 0$},
\\	B_\theta'	& \text{if $\langle x,B_\theta\rangle < 0$}.
\end{cases}
\]
In other words $T_{\mu_\theta}$ sends each point of the source to the closest point in the support of the target.

For small $\theta$, most points that $T_{\mu_0}$ sends to $B_0$ are sent to $B_\theta$ under $T_{\mu_\theta}$, but some of them, those satisfying $\langle x,B_0\rangle > 0 > \langle x,B_\theta\rangle$, are sent to $B_\theta'$. We will bound from below the measure of the latter points in order to bound from below $\norm{T_{\mu_\theta}-T_{\mu_0}}_{L^2(\rho)}$. Let us show that for any $\theta\geq 0$ small enough (so that $\sin \theta \geq \theta/2$),
\begin{equation}\label{e:B0Btheta}
B(A',\theta/4) \cap \braces[\big]{x\in\R^d \st[\big] x_2>0}\subset \braces[\big]{x\in\R^d \st[\big] \langle x,B_0\rangle>0>\langle x,B_\theta\rangle}.
\end{equation}
Let $x=(x_1,\dotsc,x_d)\in\R^d$ be an element of the left-hand side. Then $\langle x,B_0\rangle>0$ since $x_2>0$. Moreover,
\[
\langle x,B_\theta\rangle = x_1\sin \theta + x_2\cos\theta \leq x_1 \sin \theta + x_2 \leq \parens[\Bigg]{-1+\frac{\theta}{4}}\frac{\theta}{2}+\frac{\theta}{4}<0
\]
which concludes the proof of~\eqref{e:B0Btheta}. Let us now observe that there exists $c_1>0$ (independent of $\theta$) such that the left-hand side of~\eqref{e:B0Btheta} has measure at least $c_1 \rho\parens[\big]{B(A',\theta/4)}$. Indeed, for $\theta$ small enough the support of $\rho$ contains the spherical sector
\begin{equation}\label{e:cone}
B(A',\theta/4) \cap \mathcal{C}
\end{equation}
(see Figure~\ref{fig:blowup}) where $\mathcal{C}$ is the cone
\[
\mathcal{C}= \braces[\Bigg]{x\in\R^d \st[\Bigg] x_2>0, \ \frac{x_1+1}{\abs{x-A'}}>\frac12}.
\]
Denote by $c_1>0$ the angular aperture of this cone relatively to the full solid angle of the $(d-1)$-dimensional unit sphere, i.e., the area of the intersection of $\mathcal{C}$ with the unit sphere $\partial B(A',1)$, divided by the area of the full unit sphere $\partial B(A',1)$. Since $\rho$ has a radial density close to the center~$A'$, the $\rho$-measure of $B(A',\theta/4) \cap \mathcal{C}$ is at least equal to $c_1\rho\parens[\big]{B(A',\theta/4)}$.

\begin{figure}[h!]
\begin{tikzpicture}[scale=2]

\draw[->] (-2,0) -- (2,0) node[right] {$x_1$};
\draw[->] (0,-1.5) -- (0,1.5) node[above] {$x_2$};

\draw[thick,blue] (0,0) circle (1);

\filldraw[orange] (-1,0) circle (0.03) node[below left] {$A'$};

\draw[thick,orange] (-1,0) -- (0.5,0.866);
\draw[thick,orange] (-1,0) -- (1,0);

\draw[thick,purple] (-1,0) circle (0.4);

\fill[pattern=north east lines, pattern color=gray] (-1,0) -- ++(0:0.4) arc[start angle=0, end angle=30, radius=0.4] -- cycle;
\end{tikzpicture}
\caption{Illustration of~\eqref{e:cone}: in purple the ball $B(A',\theta/4)$, in orange the cone in~\eqref{e:cone}, in grey dashed lines the set~\eqref{e:cone}, and in blue the boundary of the support of $\rho$.} \label{fig:blowup}
\end{figure}

We deduce that for $\theta$ small enough, since $\abs{B'_\theta-B_0}\geq 1$,
\[
\begin{split}
\norm{T_{\mu_\theta}-T_{\mu_0}}_{L^2(\rho)}^2
	& \geq \abs{B'_\theta-B_0}^2 \rho\parens[\Big]{\braces[\big]{x \st[\big] \langle x,B_0\rangle>0>\langle x,B_\theta\rangle}}
\\	& \geq c_1\rho\parens[\big]{B(A',\theta/4)}
\\	& \geq c_0c_1\sigma_{d-1}\int_0^{\theta/4} \frac{1}{r(\log r)^2} \dd r
\\	& = c_0c_1\sigma_{d-1}\frac{1}{\abs[\big]{\log(\theta/4)}}
\end{split}
\]
which decays to $0$ as $\theta \to 0$ slower than any $\theta^\alpha$ with $\alpha>0$. Moreover, for $\theta$ small enough, $W_p(\mu_0,\mu_\theta)=\sin(\theta/2)\sim \theta/2$. Put together, these estimates conclude the proof of Theorem~\ref{t:highlyunstable}.

\begin{rema}\label{r:superpolyn}
The density $\rho$ used in the proof of Theorem~\ref{t:highlyunstable} blows up at rate $r^{-d}(\log r)^{-2}$. Let us instead consider $\rho(x)=c_\delta h\parens[\big]{\dist(x,\mathcal{E})}$ where
\[
h \colon r \mapsto r^{-d+\delta}
\]
for some $\delta>0$, i.e., a density which blows up slightly slower at $A$ and $A'$. The assumption $\delta>0$ is necessary to ensure that $\rho$ is integrable, and $c_\delta$ is chosen to make $\rho$ a probability measure. Following the proof strategy of~\cite{letrouitmerigot}, notably the proof of Theorem~1.10 in loc.~cit., one can show that for any compact $\Y\subset\R^d$, there exists $C>0$ such that
\[
\norm{T_\mu-T_\nu}_{L^2(\rho)}\leq CW_2(\mu,\nu)^{\frac{\delta}{6d}}.
\]
Conversely, the same argument as in the proof of Theorem~\ref{t:highlyunstable} (with $h$ replacing $f$) shows that if
\begin{equation}\label{e:polyn}
\norm{T_\mu-T_\nu}_{L^2(\rho)}\leq CW_p(\mu,\nu)^{\alpha}
\end{equation}
holds for some $C,\alpha>0$ and $p\geq 1$, then necessarily $\alpha\leq \delta$.

It is therefore no surprise that for densities which blow up faster than $r^{-d+\delta}$ for any $\delta>0$, any quantitative stability inequality of the form~\eqref{e:polyn} breaks down. For these densities, the techniques of~\cite{letrouitmerigot} do not apply anymore, and we are not aware of any quantitative stability inequality, even with a larger right-hand side than $W_p(\mu,\nu)^\alpha$ (e.g., $\parens[\big]{1+\abs[\big]{\log W_p(\mu,\nu)}}^{-1}$).

Actually, weaker quantitative estimates (than~\eqref{e:polyn}) should then take into account quite finely the blowup rate of the density. For instance, switching $f$ defined in~\eqref{e:defoff} to a Bertrand integrand
\[
r \mapsto r^{-d} \min\parens[\Bigg]{1,\frac{1}{\abs{\log r}\dotsm \abs{\log}^{\circ k}(r)\parens[\big]{\abs{\log}^{\circ k+1}r)^2}}},
\]
where $g^{\circ k}$ denotes the $k$ times composition of a function~$g$ with itself, will give arbitrarily slow convergence (in log scale) to $0$.
\end{rema}

\begin{rema}
For the choice of $\rho$ used in the proof of Theorem~\ref{t:highlyunstable}, the strong instability of optimal transport maps naturally raises the following question: how can one statistically estimate optimal transport maps with this source measure?
\end{rema}

\section{Proof of Theorem~\ref{t:unstableW1}}

\subsection{Idea of the construction} \label{s:ideacstr}

To prove Theorem~\ref{t:unstableW1}, we build upon the idea that stability of optimal transport maps can be lost just before uniqueness is lost. To explain our construction of $\rho$, let us start with a simple and well-known example where optimal transport plans are not unique: consider the vertices $A,B,A',B'$ of a square, for instance $A=(1,0)$, $B=(0,1)$, $A'=(-1,0)$ and $B'=(0,-1)$, and consider the optimal transport problem from $\frac12 (\delta_A+\delta_{A'})$ to $\frac12(\delta_B+\delta_{B'})$.

\begin{figure}[h!]
\begin{tikzpicture}[>=stealth,thick]

\coordinate (A) at (1,0);
\coordinate (B) at (0,1);
\coordinate (A') at (-1,0);
\coordinate (B') at (0,-1);

\draw[->] (0.9,0.1) to (0.1,0.9);
\draw[->] (0.9,-0.1) to (0.1,-0.9);
\draw[->] (-0.9,0.1) to (-0.1,0.9);
\draw[->] (-0.9,-0.1) to (-0.1,-0.9);

\node[circle,draw,inner sep=1pt,label=right:{$A$}] (A) at (1,0) {};
\node[circle,draw,inner sep=1pt,label=above:{$B$}] (B) at (0,1) {};
\node[circle,draw,inner sep=1pt,label=left:{$A'$}] (A') at (-1,0) {};
\node[circle,draw,inner sep=1pt,label=below:{$B'$}] (B') at (0,-1) {};
\end{tikzpicture}
\caption{Any transport plan between $\frac12 (\delta_A+\delta_{A'})$ and $\frac12(\delta_B+\delta_{B'})$ is optimal.} \label{fig:carre}
\end{figure}

It is not difficult to see that all transport plans have the same cost, in particular there exist infinitely many optimal transport plans. Moreover, if we keep $A$ and $A'$ unchanged, but we move a little bit $B$ and $B'$, then we can recover unique optimal transport plans/maps. For instance, if we slightly move $B$ horizontally to the right, and $B'$ symmetrically slightly to the left, then we recover a unique optimal transport map, where the mass at $A$ is sent to $B$, and the mass at $A'$ is sent to $B'$. Symmetrically, if we move $B$ to the left and $B'$ to the right, then we also recover a unique optimal transport map, but the mass at $A$ is sent to $B'$ and the mass at $A'$ is sent to $B$. The configuration displayed in Figure~\ref{fig:carre} therefore generates a strong instability of optimal transport maps (and plans). However, this example is quite specific in the sense that it deals with discrete measures, and Brenier's theorem does not apply.

Nonetheless, we can take inspiration from this example to construct absolutely continuous (and compactly supported) source measures $\rho$ for which optimal transport maps are highly unstable. First, we slightly smooth the source measure by replacing it by $\rho_r=\frac{1}{2\pi r^2}(\delta_{B(A,r)}+\delta_{B(A',r)})$ for some small $r>0$. We denote by $T^{(r)}_\mu$ the optimal transport map from $\rho_r$ to a mea\-sure~$\mu$. We know by the results of~\cite{letrouitmerigot} that stability holds for any $r>0$: for any compact set $\Y\subset\R^d$,
\[
\forall \mu,\nu\in\mathcal{P}(\Y),
\quad \norm{T^{(r)}_\mu - T^{(r)}_\nu}_{L^2(\rho)} \leq C_r W_1(\mu,\nu)^{1/6}.
\]
It can be checked that the constant $C_r$ tends to $+\infty$ as $r \to 0$, which is a sign of instability (in the limit $r \to 0$). To prove Theorem~\ref{t:highlyunstable} we repeat the above construction ``at all scales''. In other words, we take $\rho$ uniform over an infinite union of well-chosen pairs of balls $B(A_i,r_i)$, $B(A_i',r_i)$ of various sizes. Actually, we do not take balls but rectangular parallelepipeds because it is helpful at some point to have more parameters than just the radius.

\subsection{The construction}

We first construct the support of $\rho$, denoted by $\X$ in what follows (and depicted in Figure~\ref{fig:supp}). In what follows, we consider the rectangular parallelepiped
\[
\mathcal{Q}(\ell, r) =  \braces[\Bigg]{(x_1,\dotsc,x_d)\in\R^d \st[\Bigg] 
\text{$0<x_1<\ell$, $\abs{x_2} < \frac{r}{2}$, and $\abs{x_i} < \frac12$ for $3\leq i\leq d$}}
\]
and consider for $A\in\R^d$ the translates
\[
\mathcal{T}^+(A,\ell, r)=A+\mathcal{Q}(\ell,r),
\qquad \mathcal{T}^-(A,\ell,r)=A-\mathcal{Q}(\ell,r)
\]
(in the sense of the Minkowski sum and difference).

In the sequel, $\N=\{1,2,\dotsc\}$ denotes the set of positive integers. Let $(\ell_i),(r_i),(w_i)\in(\R_+)^\N$ and $(u_i)\in\R^\N$. We will make several assumptions on these sequences in Sections~\ref{s:prelim} and~\ref{s:unstabW1}. Let
\begin{equation}\label{e:SSi}
\mathcal{S}_i=\mathcal{T}^+(A_i^+,\ell_i,r_i)\cup \mathcal{T}^-(A_i^-,\ell_i,r_i)
\end{equation}
where $A_i^+=(u_i+w_i,0,\dotsc,0)$ and $A_i^-=(u_i-w_i,0,\dotsc,0)$, and let
\[
\X= \bigcup_{i=1}^{+\infty} \mathcal{S}_i.
\]
In the sequel, the sequences will always be chosen in a way that when traveling along the $x_1$-axis in the increasing $x_1$ direction, the parallelepipeds defined above do not meet, and are in the order $\mathcal{S}_1,\mathcal{S}_2,\dotsc$ as in Figure~\ref{fig:supp}.

\begin{figure}[h]
\begin{tikzpicture}

\tikzset{axis/.style = {line width=0.8pt}, tick/.style = {line width=1pt}, bigcircle/.style = {line width=0.9pt}, smallsym/.style = {line width=0.7pt}, lab/.style = {font=\small}}

\draw[dotted,->] (-2,-1) -- (-2,1);

\node[left] at (-2,1) {$x_2$};

\draw[dotted,->] (-2,0) -- (11,0);

\node[below] at (11,0) {$x_1$};

\draw (0,0.3)--(0,-0.3)--(-1,-0.3)--(-1,0.3)--cycle;
\draw (2,0.3)--(2,-0.3)--(3,-0.3)--(3,0.3)--cycle;

\draw (5.5,0.2)--(5.5,-0.2)--(4.8,-0.2)--(4.8,0.2)--cycle;
\draw (6.5,0.2)--(6.5,-0.2)--(7.2,-0.2)--(7.2,0.2)--cycle;

\draw (9,0.1)--(9,-0.1)--(8.7,-0.1)--(8.7,0.1)--cycle;
\draw (9.5,0.1)--(9.5,-0.1)--(9.8,-0.1)--(9.8,0.1)--cycle;


\draw[<->] (-1.1,-0.3) -- (-1.1,0.3) node[midway,left] {$r_i$};
\draw[<->] (-1,0.4) -- (0,0.4) node[midway,above] {$\ell_i$};
\draw[<->] (0,-0.8) -- (2,-0.8) node[midway,below] {$2w_i$};

\coordinate (A1) at (0,0);
\draw[smallsym] ($(A1)+(-0.08,0.08)$) -- ($(A1)+(0.08,-0.08)$);
\draw[smallsym] ($(A1)+(-0.08,-0.08)$) -- ($(A1)+(0.08,0.08)$);
\node[lab, right=0.5pt] at ($(A1)$) {$A_i^-$};

\coordinate (A1p) at (2,0);
\draw[smallsym] ($(A1p)+(-0.08,0.08)$) -- ($(A1p)+(0.08,-0.08)$);
\draw[smallsym] ($(A1p)+(-0.08,-0.08)$) -- ($(A1p)+(0.08,0.08)$);
\node[lab, left=0.5pt] at ($(A1p)$) {$A_i^+$};

\draw [decorate,decoration={brace,amplitude=10pt}] (-1,1.2) -- (3,1.2) node[midway,above=12pt] {$\mathcal{S}_i$};
\end{tikzpicture}
\caption{Part of the support of $\rho$, projected on the $(x_1,x_2)$-plane.} \label{fig:supp}
\end{figure}

We consider $\rho$ an absolutely continuous probability measure whose support is $\X$ and which, for any $i$, is uniform on $\mathcal{S}_i$. Finally, we let for any $i$
\[
\sigma_i = \rho\parens[\big]{\mathcal{T}^+(A_i^+,\ell_i,r_i)} = \rho\parens[\big]{\mathcal{T}^-(A_i^-,\ell_i,r_i)}.
\]
With a slight abuse of notation, the density of $\rho$ with respect to the Lebesgue measure is also denoted by $\rho$ in what follows.

Let $B_i^+=(u_i,w_i,0,\dotsc,0)$ and $B_i^-=(u_i,-w_i,0,\dotsc,0)$. Let
\begin{equation}\label{e:defmu}
\mu=\sum_{i=1}^{+\infty} \sigma_i (\delta_{B_i^+}+\delta_{B_i^-}).
\end{equation}
It is immediate to check that this is a probability measure.

For $i\in\N$, let $C_i^+=(u_i+r_i,w_i,0,\dotsc,0)$, $C_i^-=(u_i-r_i,-w_i,0,\dotsc,0)$, and
\[
\nu_i=\mu+\sigma_i(\delta_{C_i^+}+\delta_{C_i^-}-\delta_{B_i^+}-\delta_{B_i^-}).
\]
It is of course a probability measure. The only difference between $\nu_i$ and $\mu$ is that in the sum~\eqref{e:defmu}, the $i$-th term has been replaced by $\sigma_i(\delta_{C_i^+}+\delta_{C_i^-})$, while all other terms are left unchanged. With the choices made in the next section, $C_i^+$ and $C_i^-$ are seen as perturbations of $B_i^+$ and $B_i^-$.

In the sequel we call ``the $i$-th cell'' the set
\[
\pi_{12}\parens[\big]{\mathcal{S}_i\cup \{B_i^+, B_i^-, C_i^+, C_i^-\}}
\]
where $\pi_{12}$ denotes the projection onto the first two coordinates.

\subsection{Preliminary computations} \label{s:prelim}

In what follows we take the convention $u_0=-\infty$. We make the following assumptions on the sequences (roughly illustrated in Figure~\ref{fig:supp2}): for any $i\in\N$,
\begin{align}
	\min(u_i-u_{i-1},u_{i+1}-u_i) & \geq 100 \max(\ell_i,r_i,w_i), \label{e:tailleeps}
\\	w_i & \geq 100r_i. \label{e:viri}
\end{align}
The first inequality means that the cells are well-separated: the distances between points inside a given cell are much smaller than the distance to the closest other cells. The second inequality means $B_i^+$ is much closer to $C_i^+$ than to $B_i^-$ (and $B_i^-$ is much closer to $C_i^-$ than to~$B_i^+$). We will choose specific sequences $(\ell_i)$, $(r_i)$, $(w_i)$ and $(u_i)$ in Section~\ref{s:unstabW1}. In any case of application, these sequences are bounded (which implies that the probability measure $\rho$ which we construct has compact support).

\begin{figure}[h]
\begin{tikzpicture}

\tikzset{axis/.style = {line width=0.8pt}, tick/.style = {line width=1pt}, bigcircle/.style = {line width=0.9pt}, smallsym/.style = {line width=0.7pt}, lab/.style = {font=\small}}

\draw[dotted,->] (-1,-1) -- (-1,1);

\node[left] at (-1,1) {$x_2$};

\draw[dotted,->] (-2,0) -- (11,0);

\node[below] at (11,0) {$x_1$};

\draw (0,0.3)--(0,-0.3)--(-1,-0.3)--(-1,0.3)--cycle;
\draw (2,0.3)--(2,-0.3)--(3,-0.3)--(3,0.3)--cycle;

\draw (5.5,0.2)--(5.5,-0.2)--(4.8,-0.2)--(4.8,0.2)--cycle;
\draw (6.5,0.2)--(6.5,-0.2)--(7.2,-0.2)--(7.2,0.2)--cycle;

\draw (9,0.1)--(9,-0.1)--(8.7,-0.1)--(8.7,0.1)--cycle;
\draw (9.5,0.1)--(9.5,-0.1)--(9.8,-0.1)--(9.8,0.1)--cycle;

\draw[<->] (1,0) -- (1,0.9) node[midway,left] {$w_i$};
\draw[<->] (1.1,1) -- (1.4,1) node[midway,below] {$r_i$};

\coordinate (A1) at (0,0);
\draw[smallsym] ($(A1)+(-0.08,0.08)$) -- ($(A1)+(0.08,-0.08)$);
\draw[smallsym] ($(A1)+(-0.08,-0.08)$) -- ($(A1)+(0.08,0.08)$);
\node[lab, left=0.5pt] at ($(A1)$) {$A_i^-$};

\coordinate (A1p) at (2,0);
\draw[smallsym] ($(A1p)+(-0.08,0.08)$) -- ($(A1p)+(0.08,-0.08)$);
\draw[smallsym] ($(A1p)+(-0.08,-0.08)$) -- ($(A1p)+(0.08,0.08)$);
\node[lab, right=0.5pt] at ($(A1p)$) {$A_i^+$};

\coordinate (B1) at (1,1);
\draw[smallsym] ($(B1)+(-0.08,0.08)$) -- ($(B1)+(0.08,-0.08)$);
\draw[smallsym] ($(B1)+(-0.08,-0.08)$) -- ($(B1)+(0.08,0.08)$);
\node[lab, above=0.5pt] at ($(B1)$) {$B_i^+$};

\coordinate (B1p) at (1,-1);
\draw[smallsym] ($(B1p)+(-0.08,0.08)$) -- ($(B1p)+(0.08,-0.08)$);
\draw[smallsym] ($(B1p)+(-0.08,-0.08)$) -- ($(B1p)+(0.08,0.08)$);
\node[lab, below=0.5pt] at ($(B1p)$) {$B_i^-$};

\coordinate (C1) at (1.5,1);
\draw[smallsym] ($(C1)+(-0.08,0.08)$) -- ($(C1)+(0.08,-0.08)$);
\draw[smallsym] ($(C1)+(-0.08,-0.08)$) -- ($(C1)+(0.08,0.08)$);
\node[lab, above=0.5pt] at ($(C1)$) {$C_i^+$};

\coordinate (C1p) at (0.5,-1);
\draw[smallsym] ($(C1p)+(-0.08,0.08)$) -- ($(C1p)+(0.08,-0.08)$);
\draw[smallsym] ($(C1p)+(-0.08,-0.08)$) -- ($(C1p)+(0.08,0.08)$);
\node[lab, below=0.5pt] at ($(C1p)$) {$C_i^-$};
\end{tikzpicture}
\caption{The support of the measures $\rho$, $\mu$ and $\nu_i$.} \label{fig:supp2}
\end{figure}

Under assumptions~\eqref{e:tailleeps}--\eqref{e:viri}, we show that for any choice of $C,\alpha>0$ and $p\geq 1$, the inequality
\begin{equation}\label{e:stabmunui}
\norm{T_\mu-T_{\nu_i}}_{L^2(\rho)} \leq CW_p(\mu,\nu_i)^\alpha
\end{equation}
cannot hold for all $\nu_i$ simultaneously. For this, we first compute $T_\mu$ and $T_{\nu_i}$ for any $i$.

For $x=(x_1,\dotsc,x_d)\in\R^d$, let us check that
\begin{equation}\label{e:fauxTmu}
x \mapsto
\begin{cases}
	B_i^+ & \text{if $x\in \mathcal{S}_i$ and $x_2\geq 0$},
\\	B_i^- & \text{if $x\in \mathcal{S}_i$ and $x_2<0$},
\end{cases}
\end{equation}
coincides $\rho$-a.e.\ with $T_\mu$. It is immediate to verify that this application transports $\rho$ to $\mu$. Let us check that each point of the support of $\rho$ is sent by this application to the closest point in the support of $\mu$. Let $x\in \mathcal{S}_i$. We may assume that $x_2\geq 0$ (the case $x_2<0$ is symmetric) and that $x_3=\dots=x_d=0$ because all points in the support of $\mu$ have these coordinates equal to $0$. Then $x$ is closer to $B_i^+$ than to $B_i^-$, and $x$ is at distance at most $\parens[\big]{w_i^2+(\ell_i+w_i)^2}^{1/2}$ from $B_i^+$. Let $y$ be another point in the support of $\mu$, different from $B_i^-$ and $B_i^+$. The triangle inequality and then~\eqref{e:tailleeps} yield
\begin{equation}\label{e:xy}
\abs{x-y} \geq \min(u_i-u_{i-1},u_{i+1}-u_i)-w_i-\ell_i > \parens[\big]{w_i^2+(\ell_i+w_i)^2}^{1/2} \geq \abs{x-B_i^+}
\end{equation}
i.e., $x$ is closer to $B_i^+$ than to $y$. Therefore, the transport is made at smallest possible cost, and~\eqref{e:fauxTmu} is the optimal transport map.

Regarding $T_{\nu_i}$, let us show that it coincides $\rho$-a.e.\ with the application
\begin{equation}\label{e:ersatz}
x \mapsto
\begin{cases}
	T_\mu(x)		& \text{if $x\notin \mathcal{S}_i$},
\\	C_i^+		& \text{if $x\in \mathcal{T}^+(A_i^+,\ell_i,r_i)$},
\\	C_i^-		& \text{if $x\in \mathcal{T}^-(A_i^-,\ell_i,r_i)$}.
\end{cases}
\end{equation}
It is clear that this application defines a transport map from $\rho$ to $\nu_i$. We only need to show the following claim, which implies that~\eqref{e:ersatz} is the optimal transport map from $\rho$ to $\nu_i$.

\begin{enonce*}{Claim}
The application~\eqref{e:ersatz} sends each point $x$ in the support of $\rho$ to its closest point in the support of $\nu_i$.
\end{enonce*}

This claim is straightforward to check for $x\in \mathcal{S}_j$ ($j\neq i$) with a similar argument as in~\eqref{e:xy}. It is also immediate that the closest point to $x\in \mathcal{T}^+(A_i^+,\ell_i,r_i)$ is either $C_i^+$ or $C_i^-$. Let us show that any $x\in \mathcal{T}^+(A_i^+,\ell_i,r_i)$ is closer to $C_i^+$ than to $C_i^-$. We let $x=A_i^++(x_1,\dotsc,x_d)$ and observe that
\[
\begin{split}
\abs{x-C_i^-}^2 - \abs{x-C_i^+}^2
	& = (x_1+w_i+r_i)^2 + (w_i+x_2)^2 - (x_1+w_i-r_i)^2 - (w_i-x_2)^2
\\	& = 4r_i (w_i+x_1)+4w_ix_2
\\	& \geq 4r_iw_i-2r_iw_i
\\	& > 0.
\end{split}
\]
Similarly if $x\in \mathcal{T}^-(A_i^-,\ell_i,r_i)$, then $x$ is closer to $C_i^-$ than to $C_i^+$. Hence the transport is made at smallest possible cost, and~\eqref{e:ersatz} is the optimal transport map.

From the explicit expressions of $T_\mu$ and $T_{\nu_i}$ obtained above, we deduce
\begin{equation}\label{e:exactTmuTnu}
\norm{T_\mu-T_{\nu_i}}^2_{L^2(\rho)} =\frac12 \rho(\mathcal{S}_i)\abs{B_i^+-C_i^+}^2 +\frac12 \rho(\mathcal{S}_i)\abs{B_i^+-C_i^-}^2 = \sigma_i(2r_i^2+4w_i^2)\geq 4\sigma_iw_i^2.
\end{equation}
Besides, due to~\eqref{e:viri}, we have $r_i\leq w_i/100$, hence the optimal coupling between $\mu$ and $\nu_i$ is a coupling where the mass at $B_j^+$ (resp.\ $B_j^-$) does not move if $j\neq i$, and is sent to $C_j^+$ (resp.\ $C_j^-$) if $j=i$. Indeed, in this way, each piece of mass in the support of $\mu$ is sent to the closest point in the support of $\nu_i$. Therefore, for any $p\geq 1$,
\begin{equation}\label{e:exactWp}
W_p(\mu,\nu_i) = r_i \sigma_i^{1/p}.
\end{equation}
Combining~\eqref{e:exactTmuTnu} and~\eqref{e:exactWp} we deduce that for any $\alpha>0$,
\begin{equation}\label{e:impequiv}
\frac{\norm{T_\mu-T_{\nu_i}}^2_{L^2(\rho)}}{W_p(\mu,\nu_i)^{2\alpha}}\geq 4 w_i^{2}r_i^{-2\alpha}\sigma_i^{1-\frac{2\alpha}{p}}.
\end{equation}

\subsection{End of the proof of Theorem~\ref{t:unstableW1}(\ref{t:unstableW1_1})} \label{s:unstabW1}

Let $p\geq 1$ and $\alpha>\frac{p}{2(p+1)}$. We choose the sequences $(\ell_i)_{i\in\N}$, $(r_i)_{i\in\N}$, $(w_i)_{i\in\N}$ and $(u_i)_{i\in\N}$ as follows. We let $r_i=c_02^{-i}$ (again, any sequence with superpolynomial decay would work), $\ell_i=w_i=c_0c_1i^{-2}$ and $u_1\in\R$, $u_{i+1}-u_i=c_0c_2i^{-2}$ where $c_1,c_2>0$ are chosen in a way that~\eqref{e:tailleeps} and~\eqref{e:viri} hold. Choosing appropriately $c_0>0$, we fix
\[
\sum_{i=1}^{+\infty} \ell_i r_i=\frac12
\]
and let $\sigma_i=\ell_ir_i$, which makes $\rho$ uniform over its support. We get
\begin{equation}\label{e:impequiv2}
\frac{\norm{T_\mu-T_{\nu_i}}^2_{L^2(\rho)}}{W_p(\mu,\nu_i)^{2\alpha}}\geq 4w_i^2r_i^{-2\alpha}\sigma_i^{1-\frac{2\alpha}{p}}=Cw_i^{3-\frac{2\alpha}{p}}r_i^{1-\frac{2\alpha}{p}-2\alpha}.
\end{equation}
Since $\alpha>\frac{p}{2(p+1)}$, this quantity tends to $+\infty$ as $i \to +\infty$.

\begin{rema}
We did not obtain any improvement of our results by introducing other parameters in the geometric picture, for instance by making the distance between $A_i^-$ and $A_i^+$ different from the distance between $B_i^-$ and $B_i^+$.
\end{rema}

\begin{rema}\label{r:extgencost}
Theorems~\ref{t:highlyunstable} and~\ref{t:unstableW1}(\ref{t:unstableW1_1}) hold, without any single word changed, for any cost of the form $h\parens[\big]{\abs{x-y}}$ with $h \colon \R_+ \to \R$ increasing and strictly convex (see for instance~\cite[Theorem~1.17]{santambrogio} for the existence of optimal transport maps in this context). The families of optimal transport maps used to prove Theorems~\ref{t:highlyunstable} and~\ref{t:unstableW1}(\ref{t:unstableW1_1}) are indeed optimal with respect to any cost of this form, since they are obtained by sending each point in the support of the source measure into the closest point (in Euclidean distance) in the support of the target measure.
\end{rema}

\subsection{Proof of Theorem~\ref{t:unstableW1}(\ref{t:unstableW1_2})} \label{s:infinitelycompo}

We use a truncation argument similar to the one in~\cite[Section~2.2]{letrouitmerigot}. For any $i\in\N$, we consider $\rho_i^\pm$ the restriction of $\rho$ to $\mathcal{T}^\pm(A_i^\pm,\ell_i,r_i)$. We denote by $\tilde{\rho}_i^\pm=\rho_i^\pm/\rho_i^\pm\parens[\big]{\mathcal{T}^\pm(A_i^\pm,\ell_i,r_i)}$ the associated probability density. According to~\cite[Theorem~2.1]{letrouitmerigot}, for any $\psi_0,\psi_1\in\mathcal{C}^0(\mathcal{Y})$,
\[
\Var_{\tilde{\rho}_i^\pm} (\psi_1^*-\psi_0^*)\leq C_{\mathcal{Y}} \braket[\big]{\psi_1-\psi_0}{\nabla \psi_{0\#}^*\tilde{\rho}_i^\pm- \nabla \psi_{1\#}^*\tilde{\rho}_i^\pm}.
\]
Importantly, $C_{\mathcal{Y}}$ does not depend on $i$. Let $\phi_\mu$ (resp.\ $\phi_\nu$) denote a Brenier potential for the quadratic optimal transport problem from $\rho$ to $\mu$ (resp.\ $\rho$ to $\nu$). Applying the above inequality to $\psi_0=\phi_\mu^*$ and $\psi_1=\phi_\nu^*$, and multiplying by $\rho_i^\pm\parens[\big]{\mathcal{T}^\pm(A_i^\pm,\ell_i,r_i)}$ on both sides, we obtain
\begin{equation}\label{e:stabineachi}
\rho_i^\pm\parens[\big]{\mathcal{T}^\pm(A_i^\pm,\ell_i,r_i)}\Var_{\tilde{\rho}_i^\pm} (\phi_\mu-\phi_\nu)\leq C_{\mathcal{Y}} \braket[\big]{\phi_\nu^*-\phi_\mu^*}{\nabla \phi_{\mu\#}\rho_i^\pm- \nabla \phi_{\nu\#}\rho_i^\pm}.
\end{equation}
Let $c_i^\pm=\int_\X (\phi_\mu - \phi_\nu) \dd \tilde{\rho}_i^\pm$. For $k\in\N$, let also $\rho_k$ be the probability density whose density is proportional to $\sum_{i=1}^k \rho_i^+ + \rho_i^-$, and write $b_k\rho_k=\sum_{i=1}^k \rho_i^+ + \rho_i^-$ for some $b_k\in[0,1]$. Finally, let $\mu_k=\nabla \phi_{\mu\#}\rho_k$ and $\nu_k=\nabla\phi_{\nu\#}\rho_k$. In the sequel we assume that $k$ is large enough so that $b_k\geq 1/2$. Then summing the inequality~\eqref{e:stabineachi} over $i$ and over $\{+,-\}$ we deduce
\begin{equation}\label{e:rugby}
\sum_{i=1}^k \sum_{\{+,-\}}\norm{\phi_\mu-\phi_\nu-c_i^\pm}_{L^2(\rho_i^\pm)}^2 \leq C_{\mathcal{Y}} \braket[\big]{\phi_\nu^*-\phi_\mu^*}{\mu_k- \nu_k} \leq C_{\rho,\mathcal{Y}}W_1(\mu_k,\nu_k)
\end{equation}
where in the last inequality we used Kantorovich--Rubinstein duality and the fact that $\phi_\mu^*$~and~$\phi_\nu^*$ are Lipschitz, since $\X$ (the support of $\rho$) is bounded. To get stability of maps we apply~\cite[Proposition~4.1]{delmer} in $\mathcal{T}^\pm(A_i^\pm,\ell_i,r_i)$, which gives
\[
\norm{T_\mu-T_\nu}_{L^2(\rho_i^\pm)}^2 \leq C_{\rho,\mathcal{Y}}\norm{\phi_\mu-\phi_\nu-c_i^\pm}_{L^2(\rho_i^\pm)}^{2/3}
\]
where again $C_{\rho,\mathcal{Y}}$ does not depend on $i$. Summing over $i$ and $\{+,-\}$ we obtain
\begin{equation}\label{e:nwzel}
\begin{split}
\norm{T_\mu-T_\nu}_{L^2(\rho_k)}^2
	& \leq C_{\rho,\mathcal{Y}}\sum_{i=1}^k\sum_{\{+,-\}}\norm{\phi_\mu-\phi_\nu-c_i^\pm}_{L^2(\rho_i)}^{2/3}
\\	& \leq C_{\rho,\mathcal{Y}}k^{2/3}W_1(\mu_k,\nu_k)^{1/3}
\end{split}
\end{equation}
where we used that $\sum_{i=1}^k a_i \leq k^{2/3}\parens[\big]{\sum_{i=1}^k a_i^3}^{1/3}$ for any sequence $(a_i)$ of non-negative numbers, together with~\eqref{e:rugby}.

To find an upper bound on $W_1(\mu_k,\nu_k)$ we proceed as follows. We write $\mu=b_k\mu_k+\mu_k'$ where $b_k$ is introduced below~\eqref{e:stabineachi}. Thanks to the Kantorovich--Rubinstein duality formula we know there exists a $1$-Lipschitz function $g$ such that $W_1(\mu_k,\mu)=\int_{\Y} g \dd (\mu_k-\mu)$, and we may assume that $g(0)=0$. Hence $\abs[\big]{g(x)}\leq R_{\mathcal{Y}} \coloneqq \sup_{y\in\Y}\abs{y}$ for any $x\in \mathcal{Y}$. Since $\mu_k'(\mathcal{Y})=1-b_k$ we deduce
\[
W_1(\mu_k,\mu)= (1-b_k)\int_{\mathcal{Y}} g  \dd \mu_k-\int_{\Y}g \dd \mu_k'\leq 2R_{\Y}(1-b_k).
\]
Similarly, $W_1(\nu_k,\nu)\leq 2R_{\Y}(1-b_k)$. Thus
\[
W_1(\mu_k,\nu_k)\leq W_1(\mu,\nu)+W_1(\mu,\mu_k)+W_1(\nu,\nu_k)\leq W_1(\mu,\nu)+4R_\Y (1-b_k).
\]
By construction, there holds $b_k\leq C_\rho 2^{-k}$ for some $C_\rho>0$ independent of $k$. Plugging into~\eqref{e:nwzel} and using that $T_\mu$ and $T_\nu$ are essentially bounded by $R_\Y$ on $\X$, we get
\[
\norm{T_\mu-T_\nu}_{L^2(\rho)}^2\leq C_{\rho,\mathcal{Y}}k^{2/3}\parens[\big]{W_1(\mu,\nu)+2^{-k}}^{1/3}+C_{\rho,\mathcal{Y}}2^{-k}.
\]
To establish~\eqref{e:stabmapjohn2} we may assume that $W_1(\mu,\nu)\leq 1/10$, since otherwise the boundedness of $T_\mu, T_\nu$ allows to conclude. Choosing $k = \floor[\big]{-\log_2 W_1(\mu,\nu)}$, we get
\[
\norm{T_\mu-T_\nu}_{L^2(\rho)}\leq C_{\rho,\Y} W_1(\mu,\nu)^{1/6}\abs[\big]{\log W_1(\mu,\nu)}^{1/3}
\]
which implies~\eqref{e:stabmapjohn2}.

\begin{rema}
It is unlikely that strong instability as in Theorem~\ref{t:highlyunstable} can be obtained for some $\rho$ bounded above and below on a well-chosen bounded open set $\X$: due to~\cite[Remark~4.2]{letrouitmerigot}, $\X$ would need to have infinitely many connected components, and the above computations show that even in this case~\eqref{e:stabmap2} holds for some $C,\alpha>0$ and $p=1$, as soon as the size of the connected components decays fast enough.
\end{rema}

\section*{Acknowledgments}

I am very thankful to an anonymous referee for a suggestion that led to part~(\ref{t:unstableW1_2}) of Theorem~\ref{t:unstableW1}.

\printCOI

\printbibliography

\end{document}