thesis.tex

% uWaterloo Thesis Template for LaTeX 
% Last Updated May 24, 2011 by Stephen Carr, IST Client Services
% FOR ASSISTANCE, please send mail to rt-IST-CSmathsci@ist.uwaterloo.ca

% Effective October 2006, the University of Waterloo 
% requires electronic thesis submission. See the uWaterloo thesis regulations at
% http://www.grad.uwaterloo.ca/Thesis_Regs/thesistofc.asp.

% DON'T FORGET TO ADD YOUR OWN NAME AND TITLE in the "hyperref" package
% configuration below. THIS INFORMATION GETS EMBEDDED IN THE PDF FINAL PDF DOCUMENT.
% You can view the information if you view Properties of the PDF document.

% Many faculties/departments also require one or more printed
% copies. This template attempts to satisfy both types of output. 
% It is based on the standard "book" document class which provides all necessary 
% sectioning structures and allows multi-part theses.

% DISCLAIMER
% To the best of our knowledge, this template satisfies the current uWaterloo requirements.
% However, it is your responsibility to assure that you have met all 
% requirements of the University and your particular department.
% Many thanks to the feedback from many graduates that assisted the development of this template.

% -----------------------------------------------------------------------

% By default, output is produced that is geared toward generating a PDF 
% version optimized for viewing on an electronic display, including 
% hyperlinks within the PDF.
 
% E.g. to process a thesis called "mythesis.tex" based on this template, run:

% pdflatex mythesis	-- first pass of the pdflatex processor
% bibtex mythesis	-- generates bibliography from .bib data file(s) 
% pdflatex mythesis	-- fixes cross-references, bibliographic references, etc
% pdflatex mythesis	-- fixes cross-references, bibliographic references, etc

% If you use the recommended LaTeX editor, Texmaker, you would open the mythesis.tex
% file, then click the pdflatex button. Then run BibTeX (under the Tools menu).
% Then click the pdflatex button two more times. If you have an index as well,
% you'll need to run MakeIndex from the Tools menu as well, before running pdflatex
% the last two times.

% N.B. The "pdftex" program allows graphics in the following formats to be
% included with the "\includegraphics" command: PNG, PDF, JPEG, TIFF
% Tip 1: Generate your figures and photos in the size you want them to appear
% in your thesis, rather than scaling them with \includegraphics options.
% Tip 2: Any drawings you do should be in scalable vector graphic formats:
% SVG, PNG, WMF, EPS and then converted to PNG or PDF, so they are scalable in
% the final PDF as well.
% Tip 3: Photographs should be cropped and compressed so as not to be too large.

% To create a PDF output that is optimized for double-sided printing: 
%
% 1) comment-out the \documentclass statement in the preamble below, and
% un-comment the second \documentclass line.
%
% 2) change the value assigned below to the boolean variable
% "PrintVersion" from "false" to "true".

% --------------------- Start of Document Preamble -----------------------

% Specify the document class, default style attributes, and page dimensions
% For hyperlinked PDF, suitable for viewing on a computer, use this:
\documentclass[letterpaper,12pt,titlepage,oneside,final]{book}
 
% For PDF, suitable for double-sided printing, change the PrintVersion variable below
% to "true" and use this \documentclass line instead of the one above:
%\documentclass[letterpaper,12pt,titlepage,openright,twoside,final]{book}

% Some LaTeX commands I define for my own nomenclature.
% If you have to, it's better to change nomenclature once here than in a 
% million places throughout your thesis!
\newcommand{\package}[1]{\textbf{#1}} % package names in bold text
\newcommand{\cmmd}[1]{\textbackslash\texttt{#1}} % command name in tt font 
\newcommand{\href}[1]{#1} % does nothing, but defines the command so the
    % print-optimized version will ignore \href tags (redefined by hyperref pkg).
%\newcommand{\texorpdfstring}[2]{#1} % does nothing, but defines the command
% Anything defined here may be redefined by packages added below...

% This package allows if-then-else control structures.
\usepackage{ifthen}
\newboolean{PrintVersion}
\setboolean{PrintVersion}{false} 
% CHANGE THIS VALUE TO "true" as necessary, to improve printed results for hard copies
% by overriding some options of the hyperref package below.

\usepackage{nomencl} % For a nomenclature (optional; available from ctan.org)
\usepackage{amsmath,amssymb,amstext} % Lots of math symbols and environments
\usepackage[pdftex]{graphicx} % For including graphics N.B. pdftex graphics driver 

% Hyperlinks make it very easy to navigate an electronic document.
% In addition, this is where you should specify the thesis title
% and author as they appear in the properties of the PDF document.
% Use the "hyperref" package 
% N.B. HYPERREF MUST BE THE LAST PACKAGE LOADED; ADD ADDITIONAL PKGS ABOVE
\usepackage[pdftex,letterpaper=true,pagebackref=false]{hyperref} % with basic options
		% N.B. pagebackref=true provides links back from the References to the body text. This can cause trouble for printing.
\hypersetup{
    plainpages=false,       % needed if Roman numbers in frontpages
    pdfpagelabels=true,     % adds page number as label in Acrobat's page count
    bookmarks=true,         % show bookmarks bar?
    unicode=false,          % non-Latin characters in Acrobat’s bookmarks
    pdftoolbar=true,        % show Acrobat’s toolbar?
    pdfmenubar=true,        % show Acrobat’s menu?
    pdffitwindow=false,     % window fit to page when opened
    pdfstartview={FitH},    % fits the width of the page to the window
    pdftitle={Biologically Inspired Adaptive Control of Quadcopter Flight},    % title
    pdfauthor={Brent Komer},    % author
%    pdfsubject={Subject},  % subject: CHANGE THIS TEXT! and uncomment this line
%    pdfkeywords={keyword1} {key2} {key3}, % list of keywords, and uncomment this line if desired
    pdfnewwindow=true,      % links in new window
    colorlinks=true,        % false: boxed links; true: colored links
    linkcolor=blue,         % color of internal links
    citecolor=green,        % color of links to bibliography
    filecolor=magenta,      % color of file links
    urlcolor=cyan           % color of external links
}
\ifthenelse{\boolean{PrintVersion}}{   % for improved print quality, change some hyperref options
\hypersetup{	% override some previously defined hyperref options
%    colorlinks,%
    citecolor=black,%
    filecolor=black,%
    linkcolor=black,%
    urlcolor=black}
}{} % end of ifthenelse (no else)

% Setting up the page margins...
% uWaterloo thesis requirements specify a minimum of 1 inch (72pt) margin at the
% top, bottom, and outside page edges and a 1.125 in. (81pt) gutter
% margin (on binding side). While this is not an issue for electronic
% viewing, a PDF may be printed, and so we have the same page layout for
% both printed and electronic versions, we leave the gutter margin in.
% Set margins to minimum permitted by uWaterloo thesis regulations:
\setlength{\marginparwidth}{0pt} % width of margin notes
% N.B. If margin notes are used, you must adjust \textwidth, \marginparwidth
% and \marginparsep so that the space left between the margin notes and page
% edge is less than 15 mm (0.6 in.)
\setlength{\marginparsep}{0pt} % width of space between body text and margin notes
\setlength{\evensidemargin}{0.125in} % Adds 1/8 in. to binding side of all 
% even-numbered pages when the "twoside" printing option is selected
\setlength{\oddsidemargin}{0.125in} % Adds 1/8 in. to the left of all pages
% when "oneside" printing is selected, and to the left of all odd-numbered
% pages when "twoside" printing is selected
\setlength{\textwidth}{6.375in} % assuming US letter paper (8.5 in. x 11 in.) and 
% side margins as above
\raggedbottom

% The following statement specifies the amount of space between
% paragraphs. Other reasonable specifications are \bigskipamount and \smallskipamount.
\setlength{\parskip}{\medskipamount}

% The following statement controls the line spacing.  The default
% spacing corresponds to good typographic conventions and only slight
% changes (e.g., perhaps "1.2"), if any, should be made.
\renewcommand{\baselinestretch}{1} % this is the default line space setting

% By default, each chapter will start on a recto (right-hand side)
% page.  We also force each section of the front pages to start on 
% a recto page by inserting \cleardoublepage commands.
% In many cases, this will require that the verso page be
% blank and, while it should be counted, a page number should not be
% printed.  The following statements ensure a page number is not
% printed on an otherwise blank verso page.
\let\origdoublepage\cleardoublepage
\newcommand{\clearemptydoublepage}{%
  \clearpage{\pagestyle{empty}\origdoublepage}}
\let\cleardoublepage\clearemptydoublepage

% Allow chapter references to be capitalized
\newcommand{\Chapref}[1]{Chapter~\ref{#1}}

\newcommand{\Tabref}[1]{Table~\ref{#1}}

% Allow subfigures
\usepackage{subcaption}

% Allow multi-references
\usepackage{cleveref}

% Allow descriptions under tables
\usepackage[flushleft]{threeparttable}

% Allow symbol for degrees
\usepackage{gensymb}

% For tables to be able to fill the whole text width
\usepackage{tabularx}

% Include a Glossary
%\usepackage[acronym]{glossaries}

%\makeglossaries

%\newglossaryentry{Nengo}{name=Nengo,description={blabla}}
%\newglossaryentry{decoder}{name=Decoders,description={blablazzzz}}

% Find the figures stored in this folder
\graphicspath{./figures/}

% Uncomment these to remove all figures and tables for a print version for editing
%\usepackage{comment}
%\excludecomment{figure}
%\excludecomment{table}
%\let\endfigure\relax
%\let\endtable\relax


%======================================================================
%   L O G I C A L    D O C U M E N T -- the content of your thesis
%======================================================================
\begin{document}

% For a large document, it is a good idea to divide your thesis
% into several files, each one containing one chapter.
% To illustrate this idea, the "front pages" (i.e., title page,
% declaration, borrowers' page, abstract, acknowledgements,
% dedication, table of contents, list of tables, list of figures,
% nomenclature) are contained within the file "uw-ethesis-frontpgs.tex" which is
% included into the document by the following statement.
%----------------------------------------------------------------------
% FRONT MATERIAL
%----------------------------------------------------------------------
\input{thesis-frontpgs} 

%----------------------------------------------------------------------
% MAIN BODY
%----------------------------------------------------------------------
% Because this is a short document, and to reduce the number of files
% needed for this template, the chapters are not separate
% documents as suggested above, but you get the idea. If they were
% separate documents, they would each start with the \chapter command, i.e, 
% do not contain \documentclass or \begin{document} and \end{document} commands.
%======================================================================
\chapter{Introduction}
%======================================================================

\section{Motivation}

%[talk about humans being a very good control system, want to build a robotic system that leverages some of the abilities humans have, by building it with a biologically realistic neural simulator. Can also be run on low power neural hardware in the future]

Humans have an exceptional ability to be able to adapt to their surroundings.
In particular the human motor control system is able to compensate for changes in forces, torques, and inertial effects on the body.
For example, when picking up an object such as a hammer, the weight of the hammer will apply external forces to the hand.
This will change the dynamic properties of the hand and arm movements, yet the human motor control system is able to easily compensate for these changes and accurately control movement with the object.
Even if an object has never been encountered before, the human brain is able to calculate the correct changes in timing and muscle tensions in order to skilfully manipulate the object.
The predictive capabilities of the brain, along with the plasticity of neural connections in the motor area help guide these sophisticated behaviours.


This ability for quick and easy adaptation to new dynamic properties of a system would be extremely useful in robotics. 
Applying similar methods of control that have been developed over millions of years of evolution in the brain to a robotic control system could result in major improvements. 
This is especially useful now that the demands of many robotic systems are now more general purpose than they were in the past. 
Robots started out mainly performing simple and repetitive tasks in stable environments, such as automation in manufacturing \cite{garcia2007evolution}. 
Now they are being used increasingly in more complex situations requiring a diverse amount of control, such as search and rescue missions, performing medical procedures, and assisting the elderly \cite{garcia2007evolution, hockstein2007history, nourbakhsh2005human, lacey1998application}. 
When the precise environment that the robot will operate in is not fully known, it is useful for any control system that the robot uses to be adaptable to those environments.

Another advantage the brain has when it comes to control, is that it uses very little power, about 20 Watts on average \cite{hart1975brain}.
% the motor is the majority of the power on a quadcopter, so neuromorphic hardware might not help much here
Hardware inspired by the brain is being designed to take advantage of this low power paradigm. 
This style of hardware, known as neuromorphic hardware, is typically massively parallel and consumes much less power than traditional hardware.
The algorithms explored in this thesis focus on being adaptive and biologically inspired in order to take advantage of such hardware.

\section{Quadcopters}

%[basic characteristics, four rotors, rotating different ways]
%[something about vertical take off and landing]
%[light-weight]
%[very maneuverable]

Quadcopters are very versatile aerial vehicles, typically consisting of a central body with four upright rotors equally spaced around the body. 
This configuration allows them to be lightweight and simplifies construction. 
They also have the ability to take off and land vertically without external assistance, meaning they can be used in a lot more situations than aircraft that require a runway or other particular conditions to take off. 
They can also change directions fairly easily mid flight, and have the ability to hover at a particular location. Due to their low cost, light weight, and ease of use, they are an excellent aircraft to use for research purposes.

%[battery heaviest part, low power controller means it can last longer and/or be lighter, good candidate for neural control]
Despite all of these positive qualities, one of the main weaknesses of quadcopters is their short battery life. 
Small and simple quadcopters used by hobbyists typically last only 5 to 10 minutes before they run out of power \cite{batterylife}. 
The more expensive industrial grade quadcopters typically last up to 30 minutes before they need to be recharged, but this duration is still not long enough for some applications. 
For example, if a quadcopter is being used in a search and rescue mission it may need to remain flying for a long time without recharging, especially if it is operating in a remote area. 
Battery weight is one of the main issues hindering the maximum flight time of quadcopters. 
If a larger battery is used in attempts to increase the maximum flight time, the quadcopter will be heavier and therefore require more power to fly, which in turn will drain the battery faster. 
Two possible solutions to this problem are lighter batteries and more energy-efficient operation.
The latter can be achieved by the use of neuromorphic hardware to run the flight control system.
While the majority of the power consumed by a quadcopter goes towards the rotors, some is still used by the on-board control system.
As the sophistication of the control system increases, the computational demands will follow, leading to less overall flight time.
Computational efficiency improvements in traditional digital computation is beginning to stagnate and is expected to soon approach a limit where minimal improvement is expected.
On the contrary, computational power efficiency for biological systems is 8-9 orders of magnitude better than the power efficiency for digital computation \cite{hasler2013finding}.
Reducing the power required for the control system can allow more of the power from the batteries to be used for the rotors.

%TODO this seems like a really weak claim, and isn't the main focus of the thesis, should it still be included here as a possibility?

%[something about how adaptive control is awesome]
In addition to the low power consumption of neuromorphic hardware, they also have the advantage of using various types of neural algorithms that can be extremely useful for control.
In particular, the learning capabilities of brain-like algorithms can be used to develop controllers that are able to adapt to unknown environments with non-linear dynamics. 
This thesis explores an adaptive control algorithm for quadcopter flight constructed using simulated biologically plausible neurons.

%[talk about what each section of the thesis will talk about]
\Chapref{chap:background} gives an overview of how quadcopters are able to fly, followed by a section detailing the mathematics underlying the dynamics of a quadcopter system. 
Following the mathematical characterization is a description of how the quadcopter is modelled in a simulation environment.
\Chapref{chap:control} gives an overview of the control theory used in developing the control system, including standard PID control, adaptive control, and adaptive control in a simulated biological neural system. 
\Chapref{chap:implementation} covers the initial design of the controller model, as well as many of the iterations of improvements leading to the final design.
\Chapref{chap:analysis} describes the set of experiments performed and metrics used to quantify performance of the various controllers. 
The final section discusses the results and outlines areas of future work.

%======================================================================
\chapter{Background Information} \label{chap:background}
%======================================================================

\section{Flight Control}

%[mention the six states: x,y,z,roll,pitch,yaw -> have a diagram to explain them]
%[different modes of flight control (up/down, horizontal, rotate, hover), diagrams for each]
%[thrust is always perpendicular to the body]
%[two ways to do it, rotor aligned axis, and 45 degrees from that]

The system state of a quadcopter is six dimensional: three for position ($x$, $y$, and $z$) and three for orientation (roll, pitch, and yaw). 
There are four control inputs, which are typically taken to be the rotational velocity of each of the four rotors. 
For a physical quadcopter, these inputs are the voltages applied to each of these rotors.
A transformation in these voltages can be obtained using the physical rotor parameters to estimate the rotor velocity.
For simplicity of the simulation, the velocity is used directly as the control input in this thesis. 
%TODO [continue...]

%[talk about perpendicular thrust, as well as torque on the body, and that is how each of the movements is done]
The rotors will always generate a thrust that is perpendicular to the body of the aircraft. 
In addition to this thrust, torque is generated by each rotor based on their spin direction  as well as their distance from the center of the body.
For the quadcopter to be able to control its orientation, half of the rotors spin clockwise and the other half spin counter-clockwise. 
The pairs diagonally opposite each other across the body spin in the same direction, allowing the torques to balance one another out and the quadcopter to maintain a steady orientation. 
By varying the relative speeds of each rotor, the quadcopter is able to create a net torque in a specific direction, causing a rotation about any axis.

A quadcopter performs four common actions to move around in its environment, with a distinct pattern of rotor actuation for each one: 
tilt forward/backward, tilt left/right, rotate, and move up/down. 
A quadcopter can perform any combination of these actions, and with different magnitudes of each. The relative rotor speeds required for each are shown in \autoref{fig:actions}. This set of actions make up what is referred to as the `task space' of the quadcopter in the remainder of this thesis.

\begin{figure}
\centering
\includegraphics[height=0.3\textheight]{./figures/QuadcopterMovements.JPG}
\caption{Four Primary Quadcopter Movements. Taken from \cite{harsha}}
\label{fig:actions}
\end{figure}


\section{Dynamics} \label{sec:dynamics}

%[go through all of the equations for dynamics]

The structure of a quadcopter is shown in \autoref{fig:FBD}, along with the coordinate frame and relevant forces and torques created by the rotors.
%TODO [fix this phrase a ton] 
Two important frames of reference are involved when studying quadcopters: the inertial frame is used to represent the state of the quadcopter relative to the outside world, and the body frame is fixed around the center of mass of the quadcopter and used to represent local effects on the quadcopter.
%Typically you are interested in the state of the quadcopter in the inertial frame, but the control of the quadcopter is easier to represent in the body frame.
Typically, the inertial frame is the focus when researchers are interested in the state of the quadcopter, but the body frame is more useful when considering control of the quadcopter.

%[put diagram around here]
\begin{figure}
\centering
\includegraphics[width=0.8\textwidth]{./figures/QuadcopterFBD.png}
\caption{The Inertial and Body Frames of a Quadcopter. Taken from \cite{luukkonen2011modelling} } %TODO get a better caption
\label{fig:FBD}
\end{figure}

A rotation matrix \eqref{eq:rotation_matrix} can be used to convert coordinates from the body frame to the inertial frame.
Here, $C_{x}$ represents $\cos{x}$ and $S_{x}$ represents $\sin{x}$.
To convert the other way, from inertial frame to body frame, the inverse of the matrix is used. 
Since this matrix is orthogonal, its inverse is equal to its transpose.

%[rotation matrix here]
\begin{equation} \label{eq:rotation_matrix}
R = 
\begin{bmatrix}
C_{\psi}C_{\theta} & C_{\psi}S_{\theta}S_{\phi} - S_{\psi}C_{\phi} & C_{\psi}S_{\theta}C_{\phi} + S_{\psi}S_{\phi} \\
S_{\psi}C_{\theta} & S_{\psi}S_{\theta}S_{\phi} + C_{\psi}C_{\phi} & S_{\psi}S_{\theta}C_{\phi} - C_{\psi}S_{\phi} \\
-S_{\theta} & C_{\theta}S_{\phi} & C_{\theta}C_{\phi}
\end{bmatrix}
\end{equation}

% %TODO I don't use these matrices anywhere, so there probably isn't a need to include them
%The transformation matrices for the angular velocities between the inertial and body frames are shown in \eqref{eq:global_angular_velocity} and \eqref{eq:local_angular_velocity}.

%%[matrices here]
%\begin{equation} \label{eq:global_angular_velocity}
%\dot{\eta} = W_{\eta}^{-1}\nu, 
%\begin{bmatrix}
%\dot{\phi} \\
%\dot{\theta} \\
%\dot{\psi}
%\end{bmatrix}
%=
%\begin{bmatrix}
%1 & S_{\phi}T_{\theta} & C_{\phi}T_{\theta} \\
%0 & C_{\phi} & -S_{\phi} \\
%0 & S_{\phi}/C_{\theta} & C_{\phi}/C_{\theta}
%\end{bmatrix}
%\begin{bmatrix}
%p \\
%q \\
%r
%\end{bmatrix}
%\end{equation}

%\begin{equation} \label{eq:local_angular_velocity}
%\nu = W_{\eta}\dot{\eta},
%\begin{bmatrix}
%p \\
%q \\
%r
%\end{bmatrix}
%=
%\begin{bmatrix}
%1 & 0 & -S_{\theta} \\
%0 & C_{\phi} & C_{\theta}S_{\phi} \\
%0 & -S_{\phi} & C_{\theta}C_{\phi}
%\end{bmatrix}
%\begin{bmatrix}
%\dot{\phi} \\
%\dot{\theta} \\
%\dot{\psi}
%\end{bmatrix}
%\end{equation}

The quadcopter is assumed to be symmetrical with equal length arms for each rotor. 
Aligning the arms along the body's $x$ and $y$ axes gives the diagonal inertia matrix in \eqref{eq:inertia_matrix}. 
Due to symmetry, $I_{xx} = I_{yy}$.

%[put inertial matrix here]
\begin{equation} \label{eq:inertia_matrix}
\begin{bmatrix}
I_{xx} & 0 & 0 \\
0 & I_{yy} & 0 \\
0 & 0 & I_{zz}
\end{bmatrix}
\end{equation}

The angular velocity of each rotor generates a thrust force perpendicular to the body frame, as well as a torque about the rotor axis. 
This relationship is shown in \eqref{eq:rotor_force} and \eqref{eq:rotor_torque}, 
where $\omega$ is the angular velocity of the $i$th rotor, $k$ is the lift constant, $b$ is the drag constant, and $I_{M}$ is the rotor's moment of inertia.

%[equations here]
\begin{equation} \label{eq:rotor_force}
f_{i} = kw_{i}^{2}
\end{equation}
\begin{equation} \label{eq:rotor_torque}
\tau_{M_{i}} = b\omega_{i}^{2} + I_{M}\dot{\omega_{i}}
\end{equation}

Typically the effect of the angular acceleration is considered small and is omitted in most analyses, so it will not be present in the remainder of the dynamics derivations. This is because during steady state flight the rotors will be maintaining a constant (or almost constant) velocity and will have approximately zero acceleration \cite{gibiansky}.

Note that the forces and torques generated are always proportional to the square of the rotor's angular velocity, thus working with this term instead of the angular velocity itself is simpler. %[put this part under simulation instead]?
The set of values making up the square of the rotor angular velocities is referred to as the `rotor space' in the remainder of this thesis.

Combining the forces of all four rotors leads to \eqref{eq:thrust}, where $T$ is the thrust in the direction of the z-axis of the body. 
Combining the torques of all four rotors leads to \eqref{eq:torque}, where the vector $\tau_{B}$ represents the torques across each of the principal body axes ($\tau_{\phi}$ for roll, $\tau_{\theta}$ for pitch, and $\tau_{\psi}$ for yaw). 
The distance between the rotor and the center of mass of the quadcopter is denoted by $l$. 

%[put the two equations here]
\begin{equation} \label{eq:thrust}
T = \sum_{i=1}^{4} f_{i} = k\sum_{i=1}^{4} w^{2}_{i} , T_{B} = 
\begin{bmatrix}
0 \\
0 \\
T
\end{bmatrix}
\end{equation}
\begin{equation} \label{eq:torque}
\tau_{B} = 
\begin{bmatrix}
\tau_{\phi} \\
\tau_{\theta} \\
\tau_{\psi}
\end{bmatrix}
=
\begin{bmatrix}
lk(-w_{2}^{2}+w_{4}^{2}) \\
lk(-w_{1}^{2}+w_{3}^{2}) \\
\sum_{i=1}^{4}\tau_{M_{i}}
\end{bmatrix}
\end{equation}

The governing equation for translational motion is \eqref{eq:translation}; where $\xi$ is the linear position vector, $T_{B}$ is the thrust from \eqref{eq:thrust}, $R$ is the rotation matrix from \eqref{eq:rotation_matrix}, $A(\dot{\xi})$ is a matrix containing the drag force, and $G$ is the gravitational force.

\begin{equation} \label{eq:translation}
m\ddot{\xi} = T_{B}R - A(\dot{\xi})\dot{\xi} - G
\end{equation}

Expanding \eqref{eq:translation} and setting the equation in terms of translational acceleration leads to \eqref{eq:translation_expanded}.

\begin{equation} \label{eq:translation_expanded}
\begin{bmatrix}
\ddot{x} \\
\ddot{y} \\
\ddot{z}
\end{bmatrix} 
=
\dfrac{T}{m}
\begin{bmatrix}
C_{\psi}S_{\theta}C_{\phi} + S_{\psi}S_{\phi} \\
S_{\psi}S_{\theta}C_{\phi} - S_{\psi}S_{\phi} \\
C_{\theta}C_{\phi}
\end{bmatrix}
-
\dfrac{1}{m}
\begin{bmatrix}
A_{x}|\dot{x}| & 0 & 0 \\
0 & A_{y}|\dot{y}| & 0 \\
0 & 0 & A_{z}|\dot{z}|
\end{bmatrix}
\begin{bmatrix}
\dot{x} \\
\dot{y} \\
\dot{z}
\end{bmatrix}
-
\begin{bmatrix}
0 \\
0 \\
g
\end{bmatrix}
\end{equation}

The governing equation for rotational motion is \eqref{eq:rotation_expanded}; where $\eta$ is the Euler angle vector, $I$ is the inertia matrix from \eqref{eq:inertia_matrix}, $\tau_{B}$ is the applied torque from \eqref{eq:torque}, and $C(\dot{\eta})\dot{\eta}$ is the centripetal force. 

\begin{equation} \label{eq:rotation}
I\ddot{\eta} = C(\dot{\eta})\dot{\eta} + \tau_{B}
\end{equation}

Expanding \eqref{eq:rotation} and setting the equation in terms of angular acceleration leads to \eqref{eq:rotation_expanded}.

\begin{equation} \label{eq:rotation_expanded}
\begin{bmatrix}
\ddot{\phi} \\
\ddot{\theta} \\
\ddot{\psi}
\end{bmatrix}
=
\begin{bmatrix}
(I_{yy} - I_{zz})\dot{\theta}\dot{\psi}/I_{xx} \\
(I_{zz} - I_{xx})\dot{\phi}\dot{\psi}/I_{yy} \\
(I_{xx} - I_{yy})\dot{\phi}\dot{\theta}/I_{zz}
\end{bmatrix}
+
\begin{bmatrix}
\tau_{\phi}/I_{xx} \\
\tau_{\theta}/I_{yy} \\
\tau_{\psi}/I_{zz}
\end{bmatrix}
\end{equation}

Equations \eqref{eq:translation_expanded} and \eqref{eq:rotation_expanded} are used throughout the remainder of this thesis to model the dynamics of the quadcopter during flight.

\section{Simulation}

%[maybe move this section later in the thesis, possibly put pure python sim here and v-rep sim elsewhere (with implementation?)]
%[talk about V-REP, throw in a reference for it]
%[possibly mention MORSE and choosing VREP over it]
%[talk about pure python simulator built from dynamics equations]
%[VREP is used because it allows complex environments and sensors]

%should probably include somewhere what values were used for each of the 'parameters' in the simulation, i.e. mass, inertia, lengths of rotor arms, etc

The quadcopter model was validated and tested using a computer simulation environment. 
The advantage of starting with a simulation rather than going straight to physical hardware is that it is quicker and easier to prototype new control algorithms, and it is much less costly to do so. 

There are three robot simulators that were considered: Virtual Robotics Experimentation Platform (V-REP)\cite{vrep}, Modular Open Robotics Simulator Engine (MORSE)\cite{echeverria2011modular}, and Gazebo\cite{koenig2004design}. All three simulators were experimented with during the beginning of this work and a qualitative summary of the relative strengths of each approach is shown in \autoref{table:robot_simulators}.

%TODO fix up alignment of this table, and possibly put the description text into the actual body
\begin{table}
\begin{threeparttable}
\caption{Robotic Simulator Candidates} \label{table:robot_simulators}
%\begin{center}
\begin{tabularx}{\textwidth}{| X | l | l | l |}
\hline
\textbf{Feature} & \textbf{V-REP} & \textbf{MORSE} & \textbf{Gazebo} \\ \hline
Python Support & **** & *** & * \\ \hline
Ease of Model Creation & *** & **** & ** \\ \hline
Quality of Available Quadcopter Models & *** & ** & **** \\ \hline
Timing Control & **** & * & ** \\ \hline
Interactivity with Simulation & **** & ** & ** \\ \hline
Community Support & **** & **** & **** \\ \hline
\end{tabularx}
\begin{tablenotes}
\footnotesize
\item The number of stars indicate the extent to which the simulator demonstrates a particular feature.
Descriptions for each feature are given below.
\textit{Python Support}: the extent in which the simulator supports control from an external Python script.
\textit{Ease of Model Creation}: how easy it is to build a virtual environment and add sensors and actuators to robots in this environment.
\textit{Quality of Available Quadcopter Models}: how realistic the publicly available quadcopter models are.
\textit{Timing Control}: how much control the programmer has on the physics simulation steps and synchronization with Nengo.
\textit{Interactivity with Simulation}: degree in which the simulation can be interacted with once it is built and running.
\textit{Community Support}: available online resources to get help with using the simulator.
\end{tablenotes}
%\end{center}
\end{threeparttable}
\end{table}

For this project the Virtual Robotics Experimentation Platform (V-REP) was chosen as the physical simulation environment.
This open-source software package is free to use for educational purposes; 
contains models of various robotic platforms, including a model of a quadcopter; 
and allows the construction of intricate 3D virtual environments for the quadcopter to interact with, as well as many virtual sensors with which to equip the quadcopter, such as cameras.


%[possible table comparing V-REP/MORSE/Gazebo with pros and cons of each]

The simulation environment in V-REP is set up to be a large open space with small obstacles and walls along the ground. 
The quadcopter model was initialized to start in the center of the space, 0.5 meters above the ground. 
This model is the one provided with the V-REP software courtesy of Eric Rohmer and Lyall Randell. 
The quadcopter's target is a small semi-transparent green sphere. The goal of the control system is to make the quadcopter move to the target's location with the target's orientation. 
For this thesis, an environment was created which includes various wind tunnels, which are zones of space which will exert an external force on the quadcopter in a particular direction if the quadcopter is within the zone. 
There are also zones that can exert various nonlinear forces on the quadcopter. 
For example, one zone could push the quadcopter in the $z$ direction with a force proportional to the square of its horizontal velocity. 
In addition to these zones, the quadcopter can also pick up and drop off boxes of various mass. 
These wind zones and boxes are used to test how well the controller can adapt to changes in its dynamics.
An example of a region within the simulation environment is shown in \autoref{fig:vrep_screenshot}.

\begin{figure}
\centering
\includegraphics[height=0.3\textheight]{./figures/VREPScreenshot.png}
\caption{Simulation Environment in V-REP}
\label{fig:vrep_screenshot}
\captionsetup{singlelinecheck=off,font=footnotesize}
\caption*{
Screen capture of an example environment in V-REP. The coloured areas with arrows represent regions of space where a wind force is applied to the quadcopter in the direction of the arrows. The blue region resembling a circuit board represents an area where an unknown non-linear force will be applied to the quadcopter. The semi-transparent green sphere is the target location. The quadcopter has reached its target in this example.}
\end{figure}

\chapter{Control} \label{chap:control}

\section{PID Control}

%[go over standard control, talk about PD and PID systems]
%[put in a block diagram of a simple PID system -> maybe show spots where disturbances can enter]
%[talk about applying PD and PID control to a quadcopter]
%[mention limits of PID control that adaptive control is trying to solve]

The canonical method for building a control system is to use a PID controller. 
This is a closed loop controller that works by applying gains to an error signal and feeding the result as input to the plant (system being controlled). 
PID stands for Proportional, Integral, and Derivative, referring to the three types of gains used. 
One gain is applied directly to the error (proportional); a second gain is applied to the derivative of the error, and the third gain is applied to the integral of the error. 
Each of these gains serves a unique purpose, and the careful tuning of all three is required to get the desired performance of the controller. 
The proportional gain is the main driver of the system and responds directly to the error to bring the system towards its target point. 
The larger the gain, the faster this response will be.
However, using only this gain will not account for any inertia in the system and will often cause the system being controlled to overshoot its target, especially when the gain is large. 
In some cases, the overshoot can be so large that the system becomes unstable and oscillates out of control.
Thus, the derivative gain is usually used in addition, because it is sensitive to the rate of change of the error, and attempts to bring this rate towards zero. 
The faster the system is moving towards its target state, the more this gain acts to slow it down, by effectively providing a form of damping and so reducing the amount of overshoot.
Depending on how this parameter is tuned, and the properties of the system at hand, it can remove overshooting entirely. 
Due to unmodelled disturbances or changing external inputs, there may be a steady-state error in the system, wherein the controller has reached a stable state, yet there is still an error being measured. 
The integral term is designed to account for this steady-state error by keeping a running sum of the error over time. 
Eventually, this integral error should be large enough that it will drive the system to close the steady-state error gap.

\section{Adaptive Control} \label{sec:adaptive_control}

%[name a few other adaptive control methods and give references]
%[go over the slotine adaptive control stuff, throw in a bunch of references]
%[maybe throw in a bunch of the equations and derivations I did on paper]
%[mention getting the slotine stuff working on the python simulator]
%[talk about how Nengo can be used to do adaptive control with learning]

Sometimes the kinematics and dynamics of the system being controlled are unknown, or change over time in an unknown fashion. 
A controller that works well for the system initially may not be well suited when the system undergoes changes. 
In this situation, it is useful to have a controller that can adapt to these changes.

The foundation of adaptive control is based on parameter estimation. 
First, a mathematical model of the system to be controlled is generated based on physical laws. 
This model is typically of the form shown in \eqref{eq:physical_equation}, where $q$ is the vector of state variables, $M(q)$ is a mass/inertia matrix, $C(q,\dot{q})$ is the coriolis-drag term, $g(q)$ is the gravitational force, and $\tau$ is a vector representing the input force/torque to the system.

\begin{equation} \label{eq:physical_equation}
M(q)\ddot{q} + C(q,\dot{q})\dot{q} + g(q) = \tau
\end{equation}

%TODO
%[[[[[a lot more stuff to go here, need to understand it more first]]]]]
The goal of the controller is to bring the system to a particular target state.
Typically the state as well as its derivative is desired to be controlled.
Thus, the second derivative of the state will be zero when the system has arrived at the target state.
Setting $\ddot{q}$ to be zero gives the relationship of the inputs to the rest of the state shown in \eqref{eq:input_equation}.

\begin{equation} \label{eq:input_equation}
\tau = C(q,\dot{q})\dot{q} + g(q)
\end{equation}

An estimate of $q$ and $\dot{q}$ can typically be measured, leaving the only potentially unknown quantities in the right-hand side of the equation to be the physical parameters of the system.
If these physical parameters are constant and linear with respect to the system state, the equation can be reorganized as in \eqref{eq:Y_equation}.
Here $\theta$ is a vector of constant system parameters and $Y(q,\dot{q})$ is a known matrix dependent on the system state. 
$\tau$ is the input required to keep the system in a steady state.

\begin{equation} \label{eq:Y_equation}
\tau = C(q,\dot{q})\dot{q} + g(q) = Y(q,\dot{q})\theta
\end{equation}

Often the system parameters are not fully known and an estimate needs to be used instead.
Many control applications also require the system to be able to transition to different states, rather than remain at a particular state.
The adaptive control law in \eqref{eq:Y_and_control_law} uses an estimate of the parameter vector, $\hat{\theta}$, along with a standard control law to compute the desired input to the system. Here $e$ is the state error and $K$ is a gain matrix.
More detail on this style of adaptive control law can be found in \cite{slotine1987adaptive, slotine1991applied, cheah2006adaptive}.
%TODO EDIT1 add references to Slotine papers as well as a couple quick comments about the strengths of those controllers (guaranteed to converge with sufficient exploration of space, etc)

\begin{equation} \label{eq:Y_and_control_law}
\tau = Y(q,\dot{q})\hat{\theta} + Ke
\end{equation}

The parameter estimates can be initialized to any stable value and are updated according to the relationship in \eqref{eq:Y_update_equation}.
$L$ is a learning rate parameter that determines how quickly the parameter estimates change over time in proportion to the measured error.
The parameter estimates will eventually converge on values that allow the system to be controlled with minimal error.
Given sufficient exploration of the state space, the parameter estimates are guaranteed to converge to the real values if the real values are required for optimal control \cite{slotine1987adaptive}.

\begin{equation} \label{eq:Y_update_equation}
\dot{\hat{\theta}} = LY(q,\dot{q})^{T}Ke
\end{equation}


Creating a mathematical model of a system with enough detail to account for everything is difficult. 
Assumptions and approximations must be made if the model is to be tractable. 
Moreover, external forces from the environment may influence the model, and their form may be unknown as the environment can be largely unknown. 
One way to overcome this problem is to use a set of basis functions as the model, and the weights applied to each element of the basis as the constant parameters. 
If the basis is designed such that it can represent any computable function to a reasonable degree of accuracy, it will be effective in the adaptive control problem. 
Gaussian basis functions are commonly used in adaptive control \cite{sanner1992gaussian}, but neural networks may be used as well \cite{barto1983neuronlike}.
This thesis explores the application of this form of control law using basis functions that are biologically plausible spiking neurons.

\section{Neural Simulation}

%[brief description of the NEF and Nengo and how it can be used, mostly just put references here]
%put a reference to Travis' thesis as an example of how Nengo's learning can be used for motor control

% Maybe this section shouldn't be here at all, and just have a brief description at the start of the next section?

%TODO go through and edit this section later for those common grammar things

The Neural Engineering Framework (NEF)\cite{eliasmith2004neural} provides a means of representing arbitrary vectors using the properties of neurons as a basis.
This is done through a nonlinear encoding mechanism carried out by the tuning curves of the neurons, and a weighted linear decoding of the responses of the neurons to retrieve an approximation of the vector being encoded.
A transformation can be applied to the underlying representation by specifying different weights on the linear decoding.
Any computable function can be approximated through a transform, and the degree of accuracy of the decoding is dependent on the number of neurons used and the complexity of the function.
%TODO put some simple example here about x and x**2?
%TODO talk about dynamics here, might need to fix wording, and talk about recurrent connections?
The neurons themselves are a part of a dynamical system where timing effects and filters across connections play a role in the behaviour of the system. 
%It also specifies a means of performing transformation
For more detail on the NEF, see \cite{eliasmith2007build, stewart2011neural, eliasmith2013build}. % put this line in if you think the reader might want more details.

Simulation of biological neurons is carried out by the software package Nengo \cite{bekolay2013nengo}.
This software implements the algorithms in the NEF and provides an easy to use Python interface for building complex models under this framework.
The core components of Nengo are networks, nodes, ensembles, and connections.
A network is a container for all of the components, it can contain any number of nodes, ensembles, connections, and even other networks.
There is always one base network from which the rest of the model is built.
Ensembles are groups of neurons representing a single vector. 
The dimensionality of this vector can be any positive integer.
Nodes are used when a particular part of the network is doing a computation without using neurons as the underlying representation.
Typically nodes are used as the inputs and outputs to a neural system.
Connections specify transformations between representational components (ensembles and nodes) through one-way links where the information flows from the output of the first representational component (origin) to the input of the second representational component (termination).
Connections may have a synapse model applied to them, where the information from one end of the connection is delayed by a time-step before reaching the other end and a filter with a particular time constant may be applied.
If no synaptic filter is applied, the value from the origin of the connection is sent directly to the termination of the connection during the same time step.

Nengo supports a variety of underlying neuron models for its Ensembles.
The most commonly used is the Leaky Integrate-and-Fire (LIF) neuron \cite{burkitt2006review}.
%[add a brief description?]. %TODO add this description
Ensembles can also be run in direct mode, in which functions are computed explicitly rather than with neurons.
However, models in this mode are not implementable directly on neuromorphic hardware, and the behaviour of the system can be significantly different.
Nevertheless, it can be useful for constructing working prototypes in simulation before converting the entire system to an underlying neural model. %TODO: this sounds gross, fix it... %TODO EDIT1

Once the network structure has been specified, Nengo can build and run a simulation of this network for either a specified duration of time, or until a stop signal is generated. 
All timing is measured as `simulated time' with a specific time-step. 
If the time-step is short, the simulation can capture minute timing details more accurately, but the system as a whole will run slower with respect to real-time. A default time step of 1ms is typically used in Nengo models, which provides a reasonable trade-off between accuracy and run-time.
%TODO is there any justification for this?

\section{Adaptive Control in Nengo}

%[Brief overview of Nengo (can be made longer with subsections if need be)]
%[Talk about the PES rule, how it relates to the slotine stuff]
%[talk about how there is no need to tune the KI term with the PES rule, this adaptive I term is easier to use]

In this thesis, we use the adaptive control methods described above to build a quadcopter controller using Nengo. 
An ensemble of simulated neurons is used as the set of basis functions for the physical model, and the decoders of these neurons are used as the vector of unknown constant parameters. 
A biologically plausible learning rule known as the Prescribed Error Sensitivity (PES) rule is used to update the decoder values \cite{bekolay2013simultaneous}. 

This learning method works by first creating a connection from an ensemble of spiking neurons representing the state of the system to an ensemble or node representing the output of the controller. 
This is known as the `learned connection' and can be initially set to perform any transformation, but is typically initialized for the output to be random or zero. 
If the designer has an approximation of what the final learned transformation should look like, they can set this as the initial transformation. 
Doing so will allow the system to converge to the final transformation more quickly.

The learned connection will be modulated by an error signal, which can come from anywhere in the network. 
The PES learning rule will attempt to reduce the error signal by changing the value of the decoders on the learned connection. 
The direction in which the decoder values change is dependent on the sign of the error. 
The magnitude of the change in decoder values at each time step is dependent on both the magnitude of the error and a learning-rate parameter. 
The learning rate is a dimensionless parameter that needs to be tuned for the specific application. 
It is dependent on the simulation time step, the number of neurons in the state ensemble, as well as how responsive the model needs to be to changes. 
%TODO [[check to make sure this is true!!]] 
A larger learning rate will cause larger reactions to error, effectively making the system trust its current measurements more than historical ones. 
A smaller time-step means that these changes will occur more frequently, so the net change over time will be greater. 
A larger number of neurons means that the changes will be greater, as there will be more decoders changing. The overall transformation is a sum of these decoders. 
%TODO [[check to make sure this is true!!]].
%[[talk about differing dimensions in these populations?]]

%TODO TODO TODO should this example go here???
%A simple example of using the PES learning rule to control an inverted pendulum is shown in Figure XXX for clarification before employing the rule the more high dimensional case of a quadcopter controller. [[describe how the model works in the figure description, talk about the different dimensionalities of each population]]

%[put in a simple network diagram showing the PES rule, and possibly an equation with its math]


% Should a chapter on simulation go here??
%======================================================================
%\chapter{Simulation} \label{chap:simulation}
%======================================================================

%======================================================================
\chapter{Design} \label{chap:implementation}
%======================================================================

%TODO should the pure math adaptive controller be talked about here? Maybe put this as a subsection in adaptive control? This might make more sense to be after the basic controller model is introduced, but before all of the iterations
\section{Derivation of Non-Neural Adaptive Controller} \label{chap:nonneuraladaptive} %TODO pick a better title
%this section will be heavy on math, and go through how the adaptive python model was formed (from the equations on all of those sheets of paper)

Based on the dynamics equations in \autoref{sec:dynamics} and the adaptive control theory presented in \autoref{sec:adaptive_control}, an adaptive controller for a quadcopter can be generated using the methods described in \cite{cheah2006adaptive}.
The first step is to set up the governing equation for the system \eqref{eq:starting_equation}, and rearrange it so that the right side is in terms of the second derivative of the state \eqref{eq:rearranged_starting_equation}.


\begin{equation} \label{eq:starting_equation}
M(q)\ddot{q} + C(q,\dot{q})\dot{q} + g(q) = B(q)u
\end{equation}

\begin{equation} \label{eq:rearranged_starting_equation}
\ddot{q} = M(q)^{-1}(-C(q,\dot{q})\dot{q} - g(q) + B(q)u)
\end{equation}

We can then choose the relationship in \eqref{eq:internal_starting_equation} which allows $\ddot{q}$ to be zero, given the values of the terms in \eqref{eq:C} and \eqref{eq:g}.

\begin{equation} \label{eq:internal_starting_equation}
B(q)u = C(q,\dot{q})\dot{q} + g(q)
\end{equation}

\begin{equation} \label{eq:C}
C(q,\dot{q}) = 
\begin{bmatrix}
A_{x}|\dot{x}| & 0 & 0 & 0 & 0 & 0 \\
0 & A_{y}|\dot{y}| & 0 & 0 & 0 & 0 \\
0 & 0 & A_{z}|\dot{z}| & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & I_{zz}\dot{\psi} & -I_{yy}\dot{\theta} \\
0 & 0 & 0 & -I_{zz}\dot{\psi} & 0 & I_{xx}\dot{\phi} \\
0 & 0 & 0 & I_{yy}\dot{\theta} & -I_{xx}\dot{\phi} & 0 \\
\end{bmatrix}
\end{equation}

\begin{equation} \label{eq:g}
g(q) =
\begin{bmatrix}
0 \\
0 \\
mg \\
0 \\
0 \\
0 \\
\end{bmatrix}
\end{equation}

Here, $u$ is a vector containing the thrust force and the rotational torque in the body frame, $B(q)$ is the transformation matrix of the force and torque from the body frame to the inertial frame, and $\omega$ is a vector containing the squared angular velocities of each rotor.
Consequently, we can write:

\begin{equation} \label{eq:u}
u =
\begin{bmatrix}
u_{1} \\
u_{2} \\
u_{3} \\
u_{4} \\
\end{bmatrix}
=
A\omega
=
\begin{bmatrix}
k & k & k & k \\
0 & -lk & 0 & lk \\
-lk & 0 & lk & 0 \\
-b & b & -b & b \\
\end{bmatrix}
\begin{bmatrix}
\omega_{1}^{2} \\
\omega_{2}^{2} \\
\omega_{3}^{2} \\
\omega_{4}^{2} \\
\end{bmatrix}
\end{equation}

\begin{equation} \label{eq:b}
B(q) =
\begin{bmatrix}
\cos{\phi}\sin{\theta}\cos{\psi} + \sin{\theta}\sin{\psi} & 0 & 0 & 0 \\
\cos{\phi}\sin{\theta}\sin{\psi} - \sin{\theta}\cos{\psi} & 0 & 0 & 0 \\
\cos{\phi}\cos{\theta} & 0 & 0 & 0 \\
0 & 1 & 0 & 0 \\
0 & 0 & 1 & 0 \\
0 & 0 & 0 & 1 \\
\end{bmatrix}
\end{equation}

Expanding \eqref{eq:internal_starting_equation} and rearranging the equation in terms of $\omega$ results in \eqref{eq:Y}.

%TODO find a good letter name for w->u
\begin{equation} \label{eq:Y} 
\omega = Y(q,\dot{q})\hat{\theta} = A^{-1}B(q)^{-1}[C(q,\dot{q})\dot{q} + g(q)]
\end{equation}

The form of $Y(q,\dot{q})$ can be determined as in \eqref{eq:Y_expanded} by pulling out a vector of the unknown constant parameters. The true value of these parameters assuming the model is perfect is shown in \eqref{eq:parameters}.

%TODO may need to rearrange this equation to reflect the rotor orientation
\begin{equation} \label{eq:Y_expanded} 
Y(q,\dot{q})\hat{\theta} =
\begin{bmatrix}
a|\dot{x}|\dot{x} & b|\dot{y}|\dot{y} & c|\dot{z}|\dot{z} & c & 0 & -\dot{\phi}\dot{\psi} & -\dot{\phi}\dot{\theta} \\
a|\dot{x}|\dot{x} & b|\dot{y}|\dot{y} & c|\dot{z}|\dot{z} & c & -\dot{\theta}\dot{\psi} & 0 & \dot{\phi}\dot{\theta} \\
a|\dot{x}|\dot{x} & b|\dot{y}|\dot{y} & c|\dot{z}|\dot{z} & c & 0 & \dot{\phi}\dot{\psi} & -\dot{\phi}\dot{\theta} \\
a|\dot{x}|\dot{x} & b|\dot{y}|\dot{y} & c|\dot{z}|\dot{z} & c & \dot{\theta}\dot{\psi} & 0 & \dot{\phi}\dot{\theta} \\
\end{bmatrix}
\begin{bmatrix}
\hat{\theta_{1}} \\
\hat{\theta_{2}} \\
\hat{\theta_{3}} \\
\hat{\theta_{4}} \\
\hat{\theta_{5}} \\
\hat{\theta_{6}} \\
\hat{\theta_{7}} \\
\end{bmatrix}
\end{equation}

\begin{subequations} \label{eq:abc_definitions}
\begin{equation}
d = (\cos{\phi}\sin{\theta}\cos{\psi} + \sin{\theta}\sin{\psi})^{2} + (\cos{\phi}\sin{\theta}\sin{\psi} - \sin{\theta}\cos{\psi})^{2} + (\cos{\phi}\cos{\theta})^{2}
\end{equation}
\begin{equation}
a = (\cos{\phi}\sin{\theta}\cos{\psi} + \sin{\theta}\sin{\psi}) / d
\end{equation}
\begin{equation}
b = (\cos{\phi}\sin{\theta}\sin{\psi} - \sin{\theta}\cos{\psi}) / d
\end{equation}
\begin{equation}
c = (\cos{\phi}\cos{\theta}) / d
\end{equation}
\end{subequations}

%TODO may need to change this slightly to reflect the rotor orientation
\begin{equation} \label{eq:parameters} 
\theta =
\begin{bmatrix}
A_{x}/4k \\
A_{y}/4k \\
A_{z}/4k \\
mg/4k \\
(I_{zz}-I_{yy})/2kl \\
(I_{xx}-I_{zz})/2kl \\
(I_{yy}-I_{xx})/2kl \\
\end{bmatrix}
\end{equation}

The adaptive controller and parameter update equations are shown in \eqref{eq:adaptive_control_equation} and \eqref{eq:adaptive_update_equation} respectively.
$K$ is the control gain matrix \eqref{eq:gain_matrix}, $T_{R}$ is the transformation from task space to rotor space \eqref{eq:rotor_transform}, $e$ is the state error, and $L$ is the learning rate.
The value of $L$ can either be a constant or a 4x4 matrix.
For simplicity a constant value of one is used.

%TODO need to mention and reference what K and e are in the text
\begin{equation} \label{eq:adaptive_control_equation}
\omega = Y(q,\dot{q})\hat{\theta} + T_{R}Ke
\end{equation}

\begin{equation} \label{eq:adaptive_update_equation}
\dot{\hat{\theta}} = LY^{T}T_{R}Ke
\end{equation}


\section{Controller Implementation}

%[show one model of the controller, the original one, talk about how it works]
%[something about how it is a target controller, rather than path]
%[maybe put some bit explaining the rationale behind the gains? Stuff like what increasing and decreasing specific gains does (might be better to just have this in the PID control section above)]

The goal of the controller is to actuate the quadcopter in such a way that it travels to a desired position in a reasonable amount of time. 
This position is specified as a set of $x$, $y$, and $z$ coordinates and a particular yaw direction. 
The controller is given the state error of the quadcopter and its target, as well as the current linear and angular velocities of the quadcopter. 
It needs to use this information to generate a suitable control signal.

The adaptive controller here consists of two parts. The first is a standard PD controller that generates a four-dimensional output signal in the space of possible quadcopter motions (task space). 
The gain matrix that performs this operation can be seen in \eqref{eq:gain_matrix}. 
This signal is then multiplied by the matrix in \eqref{eq:rotor_transform} to transform it into the four dimensional rotor velocity space, providing the actuation associated with the desired movement commands.
The design of this matrix depends upon the orientation of the rotor blades to the $x$ and $y$ axes. 
This transformation matrix assumes that the rotor axes are offset from the $x$ and $y$ axes by 45 degrees as the V-REP model used here.

\begin{equation} \label{eq:gain_matrix}
K =
\setcounter{MaxMatrixCols}{12}
\begin{bmatrix}
0 & 0 & k_{2} & 0 & 0 & -k_{4} & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & k_{1} & 0 & 0 & -k_{3} & 0 & -k_{5} & 0 & 0 & k_{7} & 0 & 0 \\
-k_{1} & 0 & 0 & k_{3} & 0 & 0 & 0 & -k_{5} & 0 & 0 & k_{7} & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & -k_{6} & 0 & 0 & k_{8}
\end{bmatrix}
\end{equation}

\begin{equation} \label{eq:rotor_transform}
T_{R} = 
\begin{bmatrix}
1 & -1 & 1 & 1 \\
1 & -1 & -1 & -1 \\
1 & 1 & -1 & 1 \\
1 & 1 & 1 & -1
\end{bmatrix}
\end{equation}

\begin{equation} \label{eq:control_equation}
u = 
\begin{bmatrix}
w_{1}^{2} \\
w_{2}^{2} \\
w_{3}^{2} \\
w_{4}^{2}
\end{bmatrix}
= T_{R}K
\begin{bmatrix}
x \\
y \\
z \\
\dot{x} \\
\dot{y} \\
\dot{z} \\
\phi \\
\theta \\
\psi \\
\dot{\phi} \\
\dot{\theta} \\
\dot{\psi} \\
\end{bmatrix}
\end{equation}
%TODO make this equation look prettier on paper. There also might need to be tildes and things on the variables to show that they are state error.

Translation in the $x$ and $y$ directions is dependent on the states of both $x$ and $y$ as well as roll and pitch, because a roll in the quadcopter causes a component of thrust to be applied in the $y$ direction, and a pitch causes a component of thrust to be applied in the $x$ direction.
A delicate balance needs to be found between each of the gains in order to create a stable, functioning controller. 
The setpoint for each of the velocities as well as for roll and pitch is zero.

%[gain matrix figures]

%[talk about egocentric state as the error]
The state error is measured relative to the body frame because most sensors on a real quadcopter would return measurements relative to the sensor device itself, which is located on the quadcopter. Absolute measurements could still be obtained with a GPS device, but would typically be much less accurate and such devices are harder to use for fine-tuned control. Using localized sensors, the state of the quadcopter can be defined relative to its target, making the state in the same coordinates as the error.

%[talk about building the model in Nengo]
Here the controller is implemented with a Nengo network, in which a 12-dimensional ensemble representing the state error can be projected to a 4-dimensional ensemble representing the desired control command. 
The transformation done through this projection will be by the 12x4 PD gain matrix of the controller \eqref{eq:gain_matrix}. 
This 4-dimensional ensemble is then projected to another 4-dimensional ensemble which represents the four desired angular velocities of the quadcopter's rotors. 
This projection is done through a transformation by the 4x4 rotor matrix \eqref{eq:rotor_transform}. 
This rotor velocity ensemble is connected to a node representing the physical quadcopter, which in turn feeds back into the state error ensemble. 
The network diagram is shown in \autoref{fig:NetBasic}. 
The network can be simplified further by multiplying the gain matrix with the rotor matrix to give a single transformation matrix from state error to rotor velocity. 
This simplified and functionally equivalent network is shown in \autoref{fig:NetSimplified}.
The larger network is used in the remainder of this thesis because it is more explicit regarding how each signal is used, and allows greater flexibility for design improvements and system debugging.

%[Figure of simple control model without adaptation]
%[Also figure of simplified model without task space representation]
\begin{figure}
\centering
\includegraphics[height=0.3\textheight]{./figures/QuadcopterNetworkSimpleLegend.pdf} %TODO get better quality figure
\caption{Basic Quadcopter Controller Network}
\label{fig:NetBasic}
\captionsetup{singlelinecheck=off,font=footnotesize}
\caption*{
%Light gray rounded rectangles represent ensembles running in direct mode, dark grey rectangles represent transformations applied to connections, and the blue rounded rectangle represents a node that interacts with the V-REP simulator.
}
\end{figure}

\begin{figure}
\centering
\includegraphics[height=0.20\textheight]{./figures/QuadcopterNetworkSimplifiedLegend.pdf} %TODO get better quality figure
\caption{Simplified Quadcopter Controller Network}
\label{fig:NetSimplified}
\end{figure}

\section{Iterations}

%[show different iterations of the controller, explain strengths and weaknesses]
%[have diagrams of each, similar to the powerpoint presentation]
%[talk about Hyperopt parameter tuning]

The first version of the controller uses an adaptive ensemble to influence the task space command. 
A projection from the adaptive ensemble to the task ensemble is modulated by the state error undergoing the control transform. 
The PES learning rule is applied to this connection, which seeks to build a transformation that minimizes the error coming in from this modulatory connection. 
The effect is similar to that of the I term in a PID controller, except that it uses all state information to come up with the integral gain, and can perform nonlinear transforms to accomplish this. 
Without this adaptive component, the quadcopter will have a large steady state error in the $z$ direction, and will fall to the ground as soon as the simulation starts.

The adaptive ensemble is the only component of the network that takes advantage of a neural representation, so that ensemble is constructed using spiking LIF neurons. The other ensembles are run in direct mode and store their exact mathematical values.
The network diagram is shown in \autoref{fig:NetControlTransform}.
%TODO put in explanation in text about what the colours mean

\begin{figure}
\centering
\includegraphics[height=0.3\textheight]{./figures/QuadcopterNetworkSimpleAdaptiveLegend.pdf} %TODO get better quality figure
\caption{Quadcopter Controller Network with Adaptation}
\label{fig:NetControlTransform}

\end{figure}

This model is very effective at learning the unknown mass of the quadcopter and adapting to any external forces in the $z$ direction. 
Gains were initially chosen manually by surveying other quadcopter models and trying gains until reasonable results were found. %[reference v-rep quadcopter, and a few other places?]
The starting gains were based on those present in the quadcopter PD controller provided with V-REP \cite{vrep}.
%[[put in some plots showing adaptation speed at simulation start, talk about parameters that change this speed -> maybe show all gains used with the plot?]]

When an external force is applied in the horizontal plane, this controller does a very bad job of correcting for it. 
The quadcopter will settle to a stable point in space with a constant offset from the target location, as can be seen in \autoref{fig:PlotSimpleA}. 
This behaviour results from how the error is specified:
since both position and angle gains are being combined into the same dimension to produce the task space error, multiple pairs of measurements will produce the same error. 
That is, the adaptive ensemble is being given an effective error reading of zero even though the quadcopter is not at the desired position. 
For example, if wind is producing a force in the $x$ direction, in order for the quadcopter to remain stationary, it must have a pitch angle that allows its thrust to compensate for both the gravitational force and the translational force.

\begin{figure}
\centering
\begin{subfigure}[t]{0.48\textwidth}
\includegraphics[height=0.40\textheight]{./figures/SimpleWindX.png} %TODO get better quality figure
\caption{Displacement by Wind}
\label{fig:PlotSimpleA}
\end{subfigure}
\begin{subfigure}[t]{0.48\textwidth}
\includegraphics[height=0.40\textheight]{./figures/SimpleWindY.png}
\caption{Trajectory to Target}
\label{fig:PlotSimpleB}
\end{subfigure}
\caption{Performance of Original Adaptive Controller}
\label{fig:PlotSimple}
\end{figure}

%[[show FBD of quadcopter with pitch angle and forces balancing]]

Since the quadcopter has a non-zero pitch, the control signal produced by this pitch multiplied by its gain will be non-zero. 
In order to have zero error, the product of the $x$ position error and its gain must match this value. 
The quadcopter will consequently move away from the target $x$ position in order to maintain zero control error, an undesirable side-effect of this controller design. 
In fact, there is a whole space of angle-position measurement pairs that produce zero error.
A visualization of this space is depicted in \autoref{fig:ZeroErrorSurface}. 
The reason this controller works so well under normal operation despite this flaw is that only one point in that space is ever stable at any particular time. 
When the only force acting on the quadcopter is gravity, that point is at the target location with roll and pitch angles of zero radians. 
As external forces are applied along the horizontal plane, this stable point shifts to new locations.

%[[show diagram with the cone of 0 error states]]
\begin{figure}
\centering
\includegraphics[height=0.3\textheight]{./figures/ZeroErrorSurface.png} %TODO get better quality figure
\caption{Zero Control Signal Region}
\label{fig:ZeroErrorSurface}
\captionsetup{singlelinecheck=off,font=footnotesize}
\caption*{
This surface indicates the region of the quadcopter state where the control output is zero.
The target position is at [0,0] and the angle shown is a combination of the pitch and roll angles where $angle = \sqrt{\phi^{2} + \theta^{2}}$.
The quadcopter is only ever stable at one point on this surface.
The location of that point is dependent on the external forces acting on the quadcopter.
Under only the influence of gravity, the stable point is at the center of [0,0], but when an external force is applied along the horizontal plane, the stable point shifts in the direction of that force by an amount proportional to the magnitude of that force.
The slope of this surface is the ratio of the linear and angular P gains of the controller, $k_{1}/k_{5}$.}
\end{figure}


One way to overcome the above problem is to modify the error signal that the adaptive ensemble uses. 
The only important state variables that need to match the target are the $x$, $y$, $z$ position and yaw angle. 
The state velocities should all be zero as well. 
Since roll and pitch are not supposed to be controlled, they should really be left out of the error that the adaptive ensemble uses since they do not correspond to an actual error in the desired state. 
This information is still important in allowing the quadcopter to fly properly, as the controller needs to know roll and pitch information for stable flight. If this information is removed from the controller entirely, it cannot fly.

Consequently, the first iteration of the original controller omits the roll and pitch information only from the adaptive ensemble, while still using it for the baseline controller. 
This approach involves generating a separate gain matrix to use on the modulatory connection to the learned transformation. 
The new network diagram of the controller is shown below in \autoref{fig:NetAdaptiveTransform}. 
The form of the gain matrix for the modulatory connection is shown in \eqref{eq:adaptive_gain_matrix}.

\begin{equation} \label{eq:adaptive_gain_matrix}
K_{a} =
\setcounter{MaxMatrixCols}{12}
\begin{bmatrix}
0 & 0 & k_{a2} & 0 & 0 & -k_{a4} & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & k_{a1} & 0 & 0 & -k_{a3} & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
-k_{a1} & 0 & 0 & k_{a3} & 0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & -k_{6} & 0 & 0 & k_{8}
\end{bmatrix}
\end{equation}
%TODO maybe the roll/pitch velocities should still be kept here

%[[show network diagram with adaptive transform]]
\begin{figure}
\centering
\includegraphics[height=0.3\textheight]{./figures/QuadcopterNetworkModifiedAdaptiveLegend.pdf} %TODO get better quality figure
\caption{Quadcopter Controller Network with Adaptive Transformation}
\label{fig:NetAdaptiveTransform}
\end{figure}


\subsection{Gain Tuning}

This controller is now able to adapt to external horizontal forces, but normal flight is much less stable and prone to overshooting targets. 
Possibly because the gains need to be re-tuned to work with this new controller setup. 
However, as there are 12 different gains in this controller, tuning them manually is a difficult and laborious task, one well-suited for automated parameter optimization, as described next.

The tool that I chose to use for this optimization is Hyperopt \cite{bergstra2013hyperopt}. 
Hyperopt is a python package designed to perform parameter optimization over a search space for a given function. 
As long as a problem can be set as a function that takes any number of parameters and returns a single error metric to be minimized, that problem can be used with Hyperopt. 
%TODO
Hyperopt uses a technique known as Sequential Model-Based Optimization (SMBO) for function optimization.
SMBO methods are typically used when the goal is to optimize a function that is costly to evaluate as they invest more time between function evaluations than other methods, such as conjugate gradient descent, in order to reduce the overall number of evaluations \cite{mockus1978application, bergstra2013hyperopt}.
Since each set of gains to evaluate requires a controller model to be generated and physics simulation to be run for a fixed amount of time (on the order of seconds), the evaluations are costly to compute time-wise, leading to SMBO as the preferred choice of optimization method.
%Hyperopt works by constructing a surrogate model 
%[[blather on about how hyperopt builds a surrogate model based on the problem, and how it has the option for smarter than random methods for picking points, as well as interesting ways to set up the search space]]

%TODO better title here??
\subsection{Using Hyperopt}

In order to use Hyperopt, the controller model must be encapsulated in a function that returns a useful metric of how well the controller works.
This was done by creating a set of target points for the quadcopter to move through over a period of time and an additional ensemble computing a scalar measure of the error of the quadcopter from the target. 
This error metric was taken to be a weighted Euclidean norm of each of the dimensions of the state.
States that were deemed more important (such as the $x$, $y$, and $z$ position) were given higher weights in this calculation.
States that were less important (such as roll and pitch) were given lower weights. 
Paired with this error was a status signal of whether the quadcopter should be at the target at a given time or en-route.
As the quadcopter cannot be expected to move instantaneously between targets, it should not be penalized for having an error when it has just been told to move to a different location. 
This status signal is set to 0 right after a new target is introduced, and then set to 1 again about the time when the quadcopter is expected to have reached the new target. The error at each time step is multiplied by this status signal before being recorded. 
The delay in switching back on the status signal can be used to indicate how important it is for the quadcopter to reach the target quickly. 
A longer delay means the optimization will find parameters that allow the quadcopter to reach the target very precisely with little oscillation, but reaching the target could take a long time. 
A shorter delay will prefer controllers that get to the target quickly, but may overshoot, have a steady state error, or jitter once they reach the target.

Model creation, running the physics simulator, sending the target commands, stopping the physics simulator, and returning the error metric were all encapsulated in a single Python function. 
This function was then given to Hyperopt, which ran it many times and kept track of the parameters used for the best result. 
There were 3000 evaluations of different parameter sets, and each run took about 30 seconds to complete. 
Hyperopt is able to go through a set number of evaluation points in one run, and then pick up where it left off in a separate run.
This capability allowed these runs to be completed overnight over the course of multiple nights to get the final results, shown in the Hyperopt column of \autoref{table:hyperopt_gains}. 
These are by no means the optimal gain parameters, as the function being optimized does not represent the full operational space of the controller, and the method for generating the error metric was very simplistic. 
The more complex the set of targets in the objective function, the longer each run will take to complete, meaning less of the parameter space can be explored in the same amount of time. 
A tradeoff had to be made between the amount of data to evaluate and the quality of each data point. 
Nevertheless, these gains turned out to produce a controller that works exceptionally well and is faster, more responsive, and more accurate than the previous controller.
The performance is further improved by augmenting the gains found through Hyperopt with hand-tuned gains.
These tweaks to the gains were done empirically to produce stronger performance in particular areas of flight. %TODO might need to expand on this / make it make more sense
These final gains are shown in the last column of \autoref{table:hyperopt_gains}.

%[[show a diagram or table of the target points used in the hyperopt objective function?]]

\begin{table}
\caption{Gain Values Obtained Through Hyperopt} \label{table:hyperopt_gains}
\begin{center}
\begin{tabular}{| l | l | l | l |}

\hline
\textbf{Gain} & \textbf{Original} & \textbf{Hyperopt} & \textbf{Hybrid} \\ \hline
$k_{1}$ & 0.22 & 0.4335 & 0.4335 \\ \hline
$k_{2}$ & 2.00 & 3.8617 & 8.0000 \\ \hline
$k_{3}$ & 0.47 & 0.5388 & 0.5388 \\ \hline
$k_{4}$ & 1.65 & 4.6702 & 6.6000 \\ \hline
$k_{5}$ & 3.80 & 2.5995 & 2.5995 \\ \hline 
$k_{6}$ & 10.0 & 0.8029 & 6.4230 \\ \hline 
$k_{7}$ & 1.95 & 0.5990 & 0.5990 \\ \hline 
$k_{8}$ & 3.16 & 2.8897 & 11.5589 \\ \hline 
$k_{a1}$ & 0.0063 & 0.0262 & 0.0262 \\ \hline 
$k_{a2}$ & 10.0 & 48.2387 & 26.00 \\ \hline 
$k_{a3}$ & 0.0150 & 0.0276 & 0.0276 \\ \hline 
$k_{a4}$ & 8.25 & 34.9626 & 21.45 \\ \hline 


\end{tabular}
\end{center}
\end{table}


%[talk about adaptive gains not needing to match regular ones, and go into how hyperparameter optimization is useful here. Talk about building an objective function to test parameter settings, and how it is not perfect and can be improved]


\subsection{Improving Adaptation to Horizontal Forces}

There is still much room for improvement of this controller. 
While it can adapt to horizontal forces, it does so quite slowly, as can be seen in \autoref{fig:PlotStandard}. 
This is because the adaptive component of the controller is always fighting against the command given by the standard component. 
There is an implicit target roll and pitch angle of zero radians that the controller is trying to achieve even if that is not the correct angle to be at. 
The second iteration of the controller applied an approach to overcoming this problem that employs a second adaptive ensemble that tries to learn what roll and pitch angle will allow the quadcopter to be stationary, and then feed these angles to the basic controller. 
Thus, the controller will no longer be actively trying to bring the quadcopter away from its setpoint, yet still has the angle information required for it to be able to fly. 
A network diagram of this controller is shown in \autoref{fig:NetAngleAdapt} and the performance of this controller is shown in \autoref{fig:PlotAngleAdapt}.
As can be seen, control in the $x$ direction is significantly improved, with only slightly slower control in the $y$ direction.

\begin{figure}
\centering
\begin{subfigure}[t]{0.48\textwidth}
\includegraphics[height=0.40\textheight]{./figures/StandardWindX.png} %TODO get better quality figure
\caption{Displacement by Wind}
\end{subfigure}
\begin{subfigure}[t]{0.48\textwidth}
\includegraphics[height=0.40\textheight]{./figures/StandardWindY.png}
\caption{Trajectory to Target}
\end{subfigure}
\caption{Performance of Basic Adaptive Controller}
\label{fig:PlotStandard}
\end{figure}

\begin{figure}
\centering
\includegraphics[height=0.3\textheight]{./figures/QuadcopterNetworkAdaptiveAngleLegend.pdf} %TODO get better quality figure
\caption{Quadcopter Controller Network with Angle Correction}
\label{fig:NetAngleAdapt}
\end{figure}

%[[show plots of angle adapt controller]]
\begin{figure}
\centering
\begin{subfigure}[t]{0.48\textwidth}
\includegraphics[height=0.40\textheight]{./figures/AngleCorrectionWindX.png} %TODO get better quality figure
\caption{Displacement by Wind}
\label{fig:PlotAngleAdaptA}
\end{subfigure}
\begin{subfigure}[t]{0.48\textwidth}
\includegraphics[height=0.40\textheight]{./figures/AngleCorrectionWindY.png}
\caption{Trajectory to Target}
\label{fig:PlotAngleAdaptB}
\end{subfigure}
\caption{Performance of Angle Correction Controller}
\label{fig:PlotAngleAdapt}
\end{figure}

%TODO figure out a better title for this section
\subsection{Shortcomings of the Error Signal}

Even with the above adaptation, a problem remains with the implementation of this controller. 
Since the adaptive component is driven by state error, as soon as the target is moved to a new location, a large error will be produced. 
This error will cause the adaptive transformation to change, even if it was already at its optimal configuration. 
This change can cause problems in control, such as the overshoot and oscillations seen in \autoref{fig:PlotAngleAdaptB}. 
The summation of the control signal of the underlying controller and the signal from the adaptation to the error creates a control signal that is too large for the desired performance.

To correct this behaviour, the third iteration of the controller being developed here includes a time-delayed filter that is first applied to an ensemble representing the target location. 
This filtered location is then subtracted from an unfiltered location and stored in a new ensemble. 
The signal coming from this new ensemble can be used to inhibit the adaptive ensembles. 
If the target has not moved recently, the value being projected from this filtered-difference ensemble will be close to zero, meaning there is no inhibition. 
If the target is suddenly moved, this filtered-difference ensemble will begin to output the difference between the two target positions, thereby inhibiting the adaptation in any dimension that has a difference. 
Over time the filtered target will start to match the unfiltered target, and the extent to which the adaptive ensembles can adapt to any errors increases. 
The network diagram for this model can be seen in \autoref{fig:NetTMC} and the performance of this controller is shown in \autoref{fig:PlotTMC}.
In this case the filtered-difference ensemble feeds into just the angle correction system.

\begin{figure}
\centering
\includegraphics[height=0.4\textheight]{./figures/QuadcopterNetworkAngleTMCLegend.pdf} %TODO get better quality figure
\caption{Quadcopter Controller Network with Target Modulated Control}
\label{fig:NetTMC}
\end{figure}

\begin{figure}
\centering
\begin{subfigure}[t]{0.48\textwidth}
\includegraphics[height=0.40\textheight]{./figures/AlloBothTMCWindX.png} %TODO get better quality figure
\caption{Displacement by Wind}
\end{subfigure}
\begin{subfigure}[t]{0.48\textwidth}
\includegraphics[height=0.40\textheight]{./figures/AlloBothTMCWindY.png}
\caption{Trajectory to Target}
\end{subfigure}
\caption{Performance of Filtered Angle Correction Controller}
\label{fig:PlotTMC}
\end{figure}

This controller has the best performance of those tested both for operation under normal conditions and in the presence of unknown external forces.

%TODO talk about the allocentric version too
\subsection{Additional State Information}

The state information represented by the adaptive ensemble does not have to be the same as the state information fed into the controller.
This allows the adaptation to take advantage of any sensory information available.
For example, a low battery could affect how the quadcopter flies or the accuracy of the sensor measurements. If the adaptive controller is aware of the battery state, it could learn to adapt its control signal to account for the kinds of dynamic changes that occur in this regime.
Similarly, if there is a sensor that can detect weather information (strong winds, rain, snow, etc), this sensor can be used to provide context for the controller to learn how to best adapt to these situations.
In the controllers described in the previous sections, the error with respect to the target is the only context used as this was all that was being measured.
One useful piece of additional information available in the simulation is absolute position.
If there are dynamic effects that vary with the position of the quadcopter, such as strong winds in a particular region, the controller will be able to learn the association of the dynamic effects with a particular location in order to adapt faster to those effects when the quadcopter enters that location.
The final iteration of the controller design here has the network diagram shown in \autoref{fig:NetTMCA}.
It is similar to the previous iteration, but the target modulation is applied to both adaptive populations and additional allocentric information is contained in the state and adaptation ensembles. 
This allocentric information is not passed on through the control transform.

% This figure will be the same as the non-allocentric version, just with less dimensions in the state, so probably don't need a new figure for it
%\begin{figure}
%\centering
%\includegraphics[height=0.3\textheight]{./figures/NetAlloTMC.png} %TODO get better quality figure
%\caption{Quadcopter Controller Network with Allocentric Target Modulated Control}
%\label{fig:NetAlloTMC}
%\end{figure}

\begin{figure}
\centering
\includegraphics[height=0.4\textheight]{./figures/QuadcopterNetworkTMCALegend.pdf} %TODO get better quality figure
\caption{Quadcopter Controller Network with Allocentric Target Modulated Control}
\label{fig:NetTMCA}
\end{figure}

%TODO this section may need a new name
\section{Software Simulation}

%[talk about how Nengo is connected to V-REP?]
%[Have a diagram about information flow?]
%[talk about the remote Api, setting flags in V-REP lua scripts, setting signals, sending plotting data. Talk about the run_model script and specific target patterns (or put that under experiments?)]

Each of these controllers is implemented as a Nengo model in Python. 
The main communication channel between V-REP and Nengo is the Quadcopter Node. 
This node contains a callable Python class that manages the communication between the two systems at every time step and, from the Nengo network's point of view, represents the entirety of the physics simulation. 
Connections can be made to and from this node just like any other node in a Nengo network. 
A connection to the V-REP remote API server is established when this node is created. 
This node outputs the 6 dimensional state of the quadcopter, the 6 dimensional derivative of the state of the quadcopter, as well as the 6 dimensional state of the target. 
These values are obtained by making a remote API call to read these values from the current state of the simulation. The input to the Quadcopter Node is a 4 dimensional signal representing the velocity commands to give to each of the four rotors. 
These commands are packed into a string signal and sent to the V-REP simulation. 
A Lua script is run within V-REP each time step, and this script unpacks these velocity commands. 
It then issues them to the physical quadcopter model. The physics engine calculates the appropriate forces and torques that will be applied to the quadcopter as well as the resulting changes in state (position, velocity, orientation, and angular velocity) after one time-step. 
This new state will now be read by the Nengo script in its next time-step.

Nengo is run with a time-step of 1ms, as this is the standard time-step for most models and is sufficient for modelling spike timing effects in ensembles of LIF neurons while still running at a reasonable speed on most computer processors. 
The V-REP simulation on the other hand is run at a 10ms time-step. 
Ideally it would also be run at 1ms, but the simulator has trouble rendering and making calculations that quickly. 
To account for this, Nengo only communicates with V-REP every 10 of its time-steps. 
A synchronization trigger command is also issued every 10 time-steps from Nengo. 
The V-REP simulation can move forward only after a trigger is issued, and only by one time-step. 
Nengo pauses simulation until V-REP has completed this time-step. 
This sequence ensures that the timing of each simulation is always synchronized. 
Some processing overhead is incurred in ensuring the synchronization of these two systems, but the improved accuracy of the overall simulation justifies this expense. 

%======================================================================
\chapter{Simulations and Results} \label{chap:analysis}
%======================================================================

\section{Experiments}

%[Run different models in different situations, PD, PID, Adaptive, (possibly different adaptive models)]
%[have some measure of goodness to run against -> measure of error throughout a run, -> speed of adaptation to picking up boxes or wind tunnel]
%[also put in an adaptive controller that is either allocentric or context sensitive, see if it does better on those measures]
%[lots of graphs and pretty pictures, maybe even a screenshot or two]

To get a sense of how well the neural adaptive controller performs, some reference implementations were created to use as benchmarks. 
Five different non-neural controllers were used: a standard PD controller, a standard PID controller, an improved PID controller, an improved PID controller with a faster integral gain, and an adaptive controller. 
%TODO [fix this sentence up, or just put them all in a table with a description].
% doublecheck that these are the ones you actually want to use, maybe narrow it down to only recording data for a few of them
Several iterations of the neural adaptive controller are used in the benchmarking: the basic neural adaptive controller, the angle corrective controller, the target modulated controller (TMC), and the target modulated controller with allocentric information (TMCA). %[just stick these in a table with descriptions as well] %TODO fix and re-work this sentence
The controllers used in the benchmarking are listed in \autoref{table:controller_models}

%[[show the math behind each of the controllers, mention the gravity compensation term used to account for the mass of the quadcopter, and how it is not needed in the adaptive models or PIDt. Have block diagrams or nengo diagrams of each]]

For each non-adaptive reference implementation, a gravity compensation term had to be calculated and applied to the controllers in order to obtain reasonable performance. 
The magnitude of this term was determined empirically and is proportional to the mass of the quadcopter. 
The rotor velocity signal is the sum of the gravity compensation term and the output of the controller.
The adaptive controllers do not need this extra term as they are able to learn how to compensate for the effects of gravity very quickly.

\begin{table}
\caption{Controller Models} \label{table:controller_models}
\begin{center}
\begin{tabular}{| l | p{9cm} |}

\hline
\textbf{Controller Name} & \textbf{Description} \\ \hline
PD & Standard PD controller with gravity compensation term \\ \hline
PID & Standard PID controller with gravity compensation term \\ \hline
PIDt & PID controller where the error signal for the I term is in task space rather than state space \\ \hline
PIDtf & PIDt controller with a greater integral gain \\ \hline
Non-Neural Adaptive &  Adaptive controller without using neurons. Derivation shown in \autoref{chap:nonneuraladaptive} \\ \hline %TODO should this even be here? run some tests with it if you have time
%TODO put in single transform controller?
Neural Adaptive & Simple adaptive neural controller. The network diagram can be seen in \autoref{fig:NetAdaptiveTransform} \\ \hline
Neural Adaptive Angle Correction & Adaptive neural controller with an additional adaptive neural ensemble for determining stable roll and pitch. The network diagram can be seen in \autoref{fig:NetAngleAdapt} \\ \hline
Neural Adaptive TMC & The difference between the current target state and a time delayed target modulates the error signal that drives the learning. The network diagram can be seen in \autoref{fig:NetTMC} \\ \hline
Neural Adaptive TMCA & The same as above, but absolute position and orientation information is projected into the adaptive ensembles along with the relative error to the target. The network diagram can be seen in \autoref{fig:NetTMC} \\ \hline

\end{tabular}
\end{center}
\end{table}

% TODO: this subsection should really come before the first experiments. May need to reword it so it makes sense to go there
\subsection{Metrics}

Three metrics are used to evaluate performance on these tasks. 
The first is the Root Mean Squared (RMS) error of the difference between the quadcopter's current state and its target state.
The quadcopter's state consists of position, velocity, orientation, and angular velocity.
These state variables are combined by computing the length of the resulting 12-dimensional error vector to produce a single quantity.
This value is calculated at each time-step for the duration of the run and then averaged by the number of time-steps in the run.
For point to point control this error is sometimes not the most informative because as soon as the target point has changed a large error value will be recorded even if the quadcopter is moving optimally towards its target.

The second metric is a modified version of the RMS error designed to take the desired trajectory into account.
This works by ignoring any error along the direction to the target as long as the current velocity is also in that direction.
It also ignores any error caused by the velocity in the correct direction as long as the quadcopter is not currently at its target.
There are also weights placed on specific types of errors to reflect an increased desire to minimize those errors.
For example, errors caused by overshooting the target are weighted more heavily.

The third metric is the time taken for the quadcopter to reach its target within a particular tolerance. 
This metric favours controllers that can reach the target quickly.
This metric does not worry about overshoot and non-optimal trajectories as long as steady state is achieved at the target in the end.
The particular tolerance chosen for these experiments is maintaining an RMS error of less than 0.001 for a duration of one second.

The majority of the benchmarks performed in this thesis will report results using the trajectory RMS error and the time-to-target metric.
The RMS metrics were recorded over a 30 second simulation time.
The time-to-target trials were also run for a total of 30 seconds and a value of 30 is recorded if the quadcopter never reaches its target within tolerance over that duration.

% TODO: maybe get a better title for this
\subsection{Benchmarks for Simple Environments}

A series of simple point-to-point control tasks were used to give an indication of performance:  movement in the vertical direction, movement in the horizontal direction, rotation about the yaw axis, and horizontal movement into a wind tunnel. %, and a movement that requires a change in x, y, and z position as well as yaw angle. 
These tasks are summarized in \autoref{table:tasks} below.
Each task with translational motion was completed across four different target distances (every meter from 1m to 4m) and the task with rotational motion was completed across three different target angles (every 45 degrees from 45{\degree} to 135{\degree}).
Experimental results (mean and standard error) obtained from the reference controllers and the neural adaptive controllers are shown for each of these tasks in \Cref{fig:bar_horizontal,fig:bar_vertical,fig:bar_rotational,fig:bar_wind}.
%TODO find a way to colour the table with the description instead
The legend for all of the bar plots in the remainder of this thesis is shown in \autoref{fig:bar_legend}.
%\autoref{fig:bar_horizontal}, \autoref{fig:bar_vertical},\autoref{fig:bar_rotational}, \autoref{fig:bar_wind}.
 
%Experimental results obtained from the reference controllers and the neural adaptive controllers are shown in \autoref{fig:benchmark_simple_reference} and \autoref{fig:benchmark_simple_neural} respectively. %TODO possibly put these into one figure, and make it easier to read - also need to make the scaling clear to compare neural to benchmark. Possibly have another figure with just the best neural vs the best reference
%TODO try using a bar chart instead, which will include all of them in one chart for the task

%[[Show table with different movement types, along with brief descriptions (i.e. move 2 units in the y direction into wind tunnel with 1 Newton of force in the x direction)]]
%[[maybe have multiple distances for each, and then average the RMS error, and report that for each model on the same graph? Should also have at least one plot from a single run, but its probably easiest to do massive comparisons with a single RMS number]]

%TODO fix up this table
\begin{table}
\caption{Benchmark Tasks} \label{table:tasks}
\begin{center}
\begin{tabular}{| l | p{9cm} |}

\hline
\textbf{Task} & \textbf{Description} \\ \hline
Vertical & Fly upwards (between one and four meters) \\ \hline
Horizontal & Fly in the $x$ direction (between one and four meters) \\ \hline
Rotation & Rotate about the yaw axis (between 45 and 135 degrees) \\ \hline
Wind & Fly in the $y$ direction (between one and four meters). Enter a wind tunnel after 0.75 meters. This wind tunnel exerts a force of 0.6 N in the $x$ direction \\ \hline

\end{tabular}
\end{center}
\end{table}

\begin{figure}
\centering
\includegraphics[width=0.45\textwidth]{./figures/ControllerLegend.png}
\caption{Controllers used in Benchmarking}\label{fig:bar_legend}
\end{figure}

\begin{figure}
\centering
\includegraphics[width=0.45\textwidth]{./figures/BarHorizontalPRMS30.png}
\includegraphics[width=0.45\textwidth]{./figures/BarHorizontalTime30.png}
\caption{Performance on Horizontal Movement Tasks}\label{fig:bar_horizontal}
\captionsetup{singlelinecheck=off,font=footnotesize}
\caption*{}
\end{figure}

\begin{figure}
\centering
\includegraphics[width=0.45\textwidth]{./figures/BarVerticalPRMS30.png}
\includegraphics[width=0.45\textwidth]{./figures/BarVerticalTime30.png}
\caption{Performance on Vertical Movement Tasks}\label{fig:bar_vertical}
\captionsetup{singlelinecheck=off,font=footnotesize}
\caption*{
The PID controller fared quite poorly on this task because it would overshoot the target on every trial. The trajectory RMS error is truncated in the plot to allow the performance of the other controllers to be easily seen.
}
\end{figure}

\begin{figure}
\centering
\includegraphics[width=0.45\textwidth]{./figures/BarRotationalPRMS30.png}
\includegraphics[width=0.45\textwidth]{./figures/BarRotationalTime30.png}
\caption{Performance on Rotational Movement Tasks}\label{fig:bar_rotational}
\captionsetup{singlelinecheck=off,font=footnotesize}
\caption*{}
\end{figure}

\begin{figure}
\centering
\includegraphics[width=0.45\textwidth]{./figures/BarWindPRMS30.png}
\includegraphics[width=0.45\textwidth]{./figures/BarWindTime30.png}
\caption{Performance on Horizontal Movement Through Wind Tasks}\label{fig:bar_wind}
\captionsetup{singlelinecheck=off,font=footnotesize}
\caption*{
Four of the controllers tested were not able to reach the target within the 30 second time limit. A value of 30 seconds is reported in these cases. This was due to these controllers being unable to compensate for the steady-state error in the horizontal direction caused by the wind.
}
\end{figure}

%[put some stuff in here with noise as well?]

%[also do experiments moving throughout a weird force field, and seeing improvement the more times it does it. This will have to be the allocentric model]

Overall, the performance of all of the controllers tested are quite similar on the horizontal, vertical, and rotational tasks.
On the task that involves movement in the presence of wind, the iterations of the neural adaptive controller fare very well.
The PID controller with a fast integral gain also does well on this task, so it is not clear from these tests if an adaptive controller has an advantage over a well tuned PID controller.

The performance of the neural adaptive controller is quite strong, but its performance is still relatively close to that of the modified PID controllers. 
Where the neural adaptive controller really excels is when there are external forces being applied that are functions of the system state. 
The neural adaptive controller is designed to be able to account for both linear and nonlinear functions of the system state.

% TODO: figure out a good name for this section
\subsection{Benchmarks for Complex Environments}

To quantify the performance of the quadcopter controllers under the influence of these more interesting forces, a new set of benchmark tasks is used.
These tasks are listed in \autoref{table:forcingfunctions} below. %maybe just reference the forcing function table here? Have both places reference the same table

%put table with strange forces here, include downward force proportional to horizontal velocity, sideways force proportional to position/velocity, etc.
\begin{table}
\caption{Forcing Functions} \label{table:forcingfunctions}
\begin{center}
\begin{tabular}{| l | p{9cm} |}
\hline
\textbf{Name} & \textbf{Description} \\ \hline
None & No external forces applied \\ \hline
Constant & A constant horizontal force is applied in a direction perpendicular to the quadcopter's desired trajectory\\ \hline
Vertical Velocity & A downward force is applied proportional to the quadcopter's horizontal velocity \\ \hline
Horizontal Position & A force is applied in the $y$ direction proportional to the quadcopter's $x$ position \\ \hline
Horizontal Velocity & A force is applied in the $y$ direction proportional to the quadcopter's $x$ velocity \\ \hline
\end{tabular}
\end{center}
\end{table}

Performance of the neural adaptive controllers is compared to the reference controllers on these tasks.
The results are displayed in \autoref{fig:benchmark_complex}.
The tasks that involved external forces in the horizontal direction tended to be the most difficult for the controllers tested here, likely due to the fact that these forces are more complicated to correct for.
In these cases the final iteration of the neural adaptive controller (TMCA) obtained a relatively low RMS error and was the fastest to reach the target in most trials.
In the horizontal velocity case this controller was not the fastest to reach its target, but this can be attributed to the fact that once the target is close and the quadcopter velocity is low, there will be little external force (as it is a function of velocity) in this case. 
A controller that does not adapt at all or adapts slowly will be able to reach steady state more quickly, explaining why the PD controller reached the target the fastest.
The non-neural adaptive controller also did well here, which could be caused by a low learning rate.
Performance on the task that involved a vertical force was almost the same as if there was no force at all. 

\begin{figure}
\centering
%TODO change the ordering of tasks on the figures
\includegraphics[width=1\textwidth]{./figures/BarForcesPRMS30.png}
\includegraphics[width=1\textwidth]{./figures/BarForcesTime30.png}

\caption{Performance on Benchmarks under different External Force Conditions}
\label{fig:benchmark_complex}
\captionsetup{singlelinecheck=off,font=footnotesize}
\caption*{

}
\end{figure}

%Fill the rest of this subsection with the results of running models in these environments
% could just put things in tables instead of having plots everywhere, or make the plots smaller so they don't take up an obnoxious amount of space

% this is where the loop figures will be. Talk about the controller getting better over time by learning the space
\subsection{Improvement over Time} %TODO better title for this section

The previous experiments only track performance for a single short run. 
The neural adaptive controllers are able to learn how to move throughout their environment better over time.
An example of this ability is shown in \autoref{fig:loop_path}.
The quadcopter is commanded to move back and forth between two points on the x-y plane ([0,0] and [3,0]) with a 5 second delay between each new command.
An external force is being applied to the quadcopter along the $y$ direction proportional to its $x$ direction velocity.
This causes the path of the quadcopter to become curved rather than a straight line.
Over time, the neural adaptive TMCA controller begins to compensate for this external force, and the path between the two points starts to converge towards a straight line.
The non-adaptive controllers show no such improvement over time.

%TODO include numeric RMS values to accompany the plots
\begin{figure}
\centering
\includegraphics[width=0.32\textwidth]{./figures/allo_both_tmc_loop.png}
\includegraphics[width=0.32\textwidth]{./figures/pd_loop.png}
\includegraphics[width=0.32\textwidth]{./figures/pidtf_loop.png}
\caption{Path of Quadcopter between Two Points with External Forces} %mention something about how the experiment was done here? it was 100 seconds total with 5 seconds before the point switches
\label{fig:loop_path}
\captionsetup{singlelinecheck=off,font=footnotesize}
\caption*{
RMS values for the Neural Adaptive TMCA, PD, and PIDtf controllers are 0.6729, 1.2827, and 1.7662 respectively. These values were calculated using the deviation from the line formed by the two target points as the error.}
\end{figure}


%% This subsection will talk about how pre-training for different amounts of time on particular force functions effects the performance 
%%TODO need to fix this up a bunch or remove it
%\subsection{Effects of Training Time}
%
%Adaptive controllers learn over time.
%Longer operation contributes to a better internal representation of the environment which in turn increases the performance that the controller can achieve.
%As such, it useful to characterize by how much these controllers improve their performance as a function of time.
%
%Training is done by running the controller in a particular environment for a period of time and recording the values of the learned decoders. % maybe explain this more??
%These decoder values can then be loaded into the controller at the start of a new simulation, and the new controller will effectively start off with all of the information that the old controller had learned.
%By taking snapshots of the decoder values at particular intervals throughout the training run, the new simulations can be run with different levels of pre-training to compare performance.
%Each training run is completed with a different forcing function being applied to the quadcopter.
%The particular forcing functions used in these experiments are detailed in \autoref{table:forcingfunctions}.
%In order to explore the state space, the quadcopter is sent to a series of random target locations throughout the training run.
%In the experiments shown below, 15 random targets were chosen during training.
%These targets are sampled from a 6 by 6 area in the horizontal plane.
%Every three seconds the quadcopter is commanded to move to the next target in the sequence. Once the end of the sequence has been reached, the sequence will start over again until the end of the training session. %TODO this is out of date, future runs have everything random, with no repeating of the sequence
%Training sessions lasted 1000 seconds, with recordings taken every 10 seconds. %the recordings should really be on a more exponential scale, i.e. [0,5,10,50,100,500,1000]
%
%
%% could also add and try out forces dependent on the vertical direction, or even rotation
%
%These trained controllers ran on the benchmark tasks from Section XXX.
%The performance for different lengths of training time are shown in \autoref{fig:training_time} below.
%
%%put the figure with the different forcing functions and training time here
%\begin{figure} \label{fig:training_time}
%
%\end{figure}
%
%These results show a clear improvement in the controller performance after training.
%The amount of training needed to get good results is relatively small. 
%After about XXX seconds, the controller performance does not improve significantly.
%
%%maybe show another figure with training done on more than 15 points? Highlight how exploring a large portion of the space is important


% does this even need to be a section? Results go hand in hand with experiments
\section{Results}

%[Talk about the results, give advantages and disadvantages of each]

Several ranking methods are presented in order to determine which controller had the best overall performance.
The first assigns a numerical ranking to each controller for each metric (RMS error and time-to-target) on each of the seven tasks and computes the sum of those rankings.
A rank of 1 is given to the controller with the best performance, a rank of 2 to the second best, a rank of 3 to the third best, and a rank of 4 to all others.
As can be seen in \autoref{fig:task_ranking}, the neural adaptive TMCA controller obtains the best overall rank.

\begin{figure}
\centering
\includegraphics[height=0.30\textheight]{./figures/task_ranking30.png}

\caption{Controller Performance Ranking}
\label{fig:task_ranking}
\captionsetup{singlelinecheck=off,font=footnotesize}
\caption*{Final rank is obtained from the sum of the controller's rank on each task. Lower ranks indicate better performance. There are seven tasks with two performance metrics each, resulting in fourteen tasks in total.}
\end{figure}

This ranking method can be useful, but it does not do a good job at taking into account results that are in close proximity or far apart from one another.
One approach to overcoming this issue is to compute the mean across all tasks and use that as the ranking.
Directly using the mean is sometimes not informative, as the results of each task can be on very different scales.
By normalizing the data before taking the mean, the different scales become less of an issue.
These results are shown in \autoref{table:mean_task_ranking}.
The neural adaptive TMCA controller performs the best in all categories except for the normalized RMS error, in which it is tied with the neural adaptive TMC controller.

\begin{table}
\begin{threeparttable}
\caption{Mean Controller Performance} \label{table:mean_task_ranking}
\begin{tabular}{| l | l | l | l | l |}
\hline
\textbf{Controller Name} & \multicolumn{2}{l|}{\textbf{Time-to-Target}} & \multicolumn{2}{l|}{\textbf{Trajectory RMS}} \\ \cline{2-5}
& \textbf{Direct} & \textbf{Normalized} & \textbf{Direct} & \textbf{Normalized} \\ \hline
%Basic Neural Adaptive & 6.58 & 0.41 & 7.19 & 0.25 \\ \hline
%Neural Adaptive Angle Correction & 6.84 & 0.43 & 9.03 & 0.31 \\ \hline
%Neural Adaptive TMC & 6.13 & 0.38 & 3.87 & \textbf{0.20} \\ \hline
%Neural Adaptive TMCA & \textbf{5.96} & \textbf{0.37} & \textbf{3.34} & 0.21 \\ \hline
%Non-Neural Adaptive & 6.73 & 0.42 & 10.45 & 0.30 \\ \hline
%PD & 6.07 & 0.38 & 9.15 & 0.29 \\ \hline
%PID & 7.67 & 0.48 & 9.77 & 0.36 \\ \hline
%PIDt & 6.70 & 0.41 & 7.72 & 0.26 \\ \hline
%PIDtf & 6.17 & 0.38 & 8.42 & 0.27 \\ \hline
Basic Neural Adaptive & 6.77 & 0.29 & 0.54 & 0.12 \\ \hline
Neural Adaptive Angle Correction & 5.88 & 0.27 & 0.69 & 0.26 \\ \hline
Neural Adaptive TMC & 4.36 & 0.21 & 0.13 & \textbf{0.08} \\ \hline
Neural Adaptive TMCA & \textbf{4.11} & \textbf{0.20} & \textbf{0.10} & \textbf{0.08} \\ \hline
Non-Neural Adaptive & 7.20 & 0.32 & 2.64 & 0.21 \\ \hline
PD & 6.78 & 0.29 & 1.73 & 0.21 \\ \hline
PID & 10.85 & 0.47 & 1.61 & 0.29 \\ \hline
PIDt & 6.51 & 0.28 & 0.51 & 0.12 \\ \hline
PIDtf & 4.32 & 0.21 & 0.38 & 0.16 \\ \hline
\end{tabular}
\begin{tablenotes}
\footnotesize
\item The \textit{Direct} method computes the mean across all tasks by using the mean for each task directly. The \textit{Normalized} method first normalizes the mean for each task by dividing it by the maximum mean in that task and then uses the mean of that result. Displayed values are rounded to two decimal places. The best result for each method is bolded.
\end{tablenotes}
\end{threeparttable}
\end{table}

While the final iteration of the neural adaptive controller did not perform strictly the best in all tasks, it had the strongest overall performance.
For the benchmarks that covered simple environments with no external forces, the neural controller performed on par with the reference controllers.
For the benchmarks involving external forces, the neural controller achieved the lowest RMS error and the fastest time to reach the target for the majority of the tasks.
 
%======================================================================
\chapter{Discussion and Future Work} \label{chap:discussion}
%======================================================================
% maybe split the following sections into contributions and extensions?

% talk about what this thesis provides to the community. What is new about it
\section{Contributions}

%mention adaptability to unknown environments with the potential to be run on low power neuromorphic hardware (maybe something about how Moore's law is ending, and neuromorphic hardware could be the future)
%mention integrating Nengo with V-REP to allow more interesting biological models

The main contribution of this thesis is an adaptive control system for a quadcopter using simulated biological neurons.
This is a proof-of-concept that an ensemble of spiking neurons can be used to improve quadcopter control in the face of uncertain and changing environments through a biologically realistic learning mechanism.
Using this methodology, state of the art control can be implemented on the low power and highly parallel architecture of neuromorphic hardware.

A second contribution is the integration of the Nengo neural simulation software with the robotics and physics simulation capacity of V-REP.
This union allows complex neural models to be embodied in a physical environment in a straightforward manner.
The various tools developed over the course of this thesis work to support the integration of the simulated quadcopter with Nengo are general purpose enough to be applied to other simulated robotic models.
These tools include a standard method for opening and closing the communication channel for the simulators, synchronization of simulation time between the two simulators, an interface for using sensors and actuators from Nengo, and method for displaying data from a Nengo network using V-REP's plotting interface.


%TODO talk about noise here? Throw in one simulation with noise to say that it works
%TODO can also throw in one simulation of target following and mention how it works too
%TODO possibly move this section to the previous chapter and combine with results
\section{Discussion}

While the majority of this work was conducted in an ideal environment with no sensor noise, the neural adaptive controller can still function with noisy measurements.
A few experiments were performed with Gaussian noise injected into the sensor measurements and the results compared to the reference PID and PIDtf controllers are shown in \autoref{fig:noise_comparison}.
As expected, all of the controllers perform worse as the amount of noise in the system is increased.
Each controller degrades at about the same rate with respect to the increase in noise, with the adaptive controller being affected slightly more than the others.
%TODO make this make more sense
One possible explanation for this is that the adaptive controller is attempting to learn something from the errors caused by noise, but cannot due so correctly because the noise is random.
In future work, methods could be implemented to increase robustness to noise, such as smoothing measurements over time.

\begin{figure}
\centering
\includegraphics[height=0.3\textheight]{./figures/HorizontalNoise.png}
\caption{Horizontal Motion with Noise}
\label{fig:noise_comparison}
\captionsetup{singlelinecheck=off,font=footnotesize}
\caption*{
Noise level indicates the variance on the Gaussian noise applied to the state measurements. The noise applied to the angular measurements is one tenth of the linear measurements. A noise level of 0.2 means that the variance on position and velocity measurements is 0.2 and the variance on orientation and angular velocity measurements is 0.02.}
\end{figure}

Another common goal of a control system is path following.
This is where instead of the robot being instructed to move to a particular point, it is told to follow a particular path.
A series of position/velocity pairs can be sequentially given as set-points rather than a single position set-point.
The neural adaptive controller was designed for point-to-point control, but it can also be given a path to follow.
A top-view screenshot from V-REP of a simple example demonstrating this capability is shown in \autoref{fig:circular_path_ff6}.
Here the quadcopter is commanded to continuously fly along a circular path while being subjected to an external force along the y-axis (up/down) proportional to its velocity along the x-axis (left/right).
In the beginning, the quadcopter's trajectory is very elliptical due to this external force, but as it continues to fly the adaptive component of the controller learns to compensate for this force, eventually producing very circular trajectories.


\begin{figure}
\centering
\includegraphics[height=0.3\textheight]{./figures/CircularPathFF6.png}
\caption{Circular Path with External Forces}
\label{fig:circular_path_ff6}
\end{figure}


\section{Future Work}

Further research could usefully explore improvements to this model in terms of both physical accuracy and additional capabilities.
The closer the model behaves to a real physical system, the more useful it will be in guiding the design of real world applications.
The more capabilities the model displays, the more variety in the applications it can be applied to.
%TODO fix this last sentence up a bit

\subsection{Adaptive Prediction System}

In addition to control, the inverse problem of predicting future states is important.
An internal model of the system dynamics can be constructed using sensor measurements throughout time and updating its representation based on its own errors of future state prediction.
If the environment changes, or the quadcopter or its sensors become damaged, the internal model can adapt to reflect those changes and give predictions for this augmented system.
Integrating adaptive control with an adaptive method for system identification could lead to major improvements in overall system performance.


\subsection{Integrated Navigation and Planning System}

A useful extension of the adaptive controller would be to embed it within a larger navigation and planning system.
Currently the quadcopter will only fly in a direct path towards a specified point, but it would be useful for it to be able to detect obstacles in its path and calculate a new route to avoid those obstacles.
The adaptive ensemble of neurons in the controller could be set up to be able to recognize certain obstacles and learn to avoid them in an effective manner.
An internal representation of the environment along with a memory for changes in the environment could be used to allow dynamic planning of optimal trajectories to a target.

% talk about modelling more complex things like blade flapping and vortex ring state
%TODO maybe these weird states could be learned? That would be cool.
\subsection{Detailed Modelling}

Under special conditions, the dynamics of a quadcopter can change dramatically from the ideal model presented in this thesis.
One such condition is known as vortex ring state, which can arise when the quadcopter descends too quickly \cite{vortexring}.
The quadcopter's rotor blades may enter the turbulent downwash of the air beneath the craft, causing a severe loss of lift.
A vortex forms in a circular ring around the rotors' path of rotation, bringing turbulent air from beneath the rotors to above them.
Increasing the throttle at this point only makes the vortex stronger, eventually causing a total loss of lift.
This is a dangerous situation to encounter, so it could be highly beneficial to include some properties of this state as well as signs of entering this state (typically wobbling of the craft during descent) in the model.
With this extra information, the adaptive controller should have an easier time learning how to recover from this state and avoid entering it entirely.
%TODO put in reference for vortex ring state

Another condition that was not modelled in simulation is blade flapping.
This condition occurs when the rotor blades undergo translational motion;
the advancing blade experiences a higher tip velocity while the retreating blade experiences a lower tip velocity \cite{bangura2012nonlinear}.
This differential results in an increase in lift in the advancing blade and a decrease in lift in the retreating blade, applying a torque to the rotor disk.
The rotor tip path flaps up as the blade advances, and down as the blade retreats to balance the aerodynamic forces.
The adaptive controller is designed to be able to account for external non-linear effects, so this would be a good condition to test the controller with in the future.

%TODO add ground effects as well? Maybe just test them in one experiment because it is quick and easy to do, and mention how more work can be done on it here

% talk about using camera for odometry, or ultrasonic/infrared for distance, etc
\subsection{Realistic Sensors}

The current implementation of the simulation is able to directly read the exact state information of the quadcopter.
A more realistic scenario would be to use a set of sensors that provide measurements that can be used to estimate the state.
Each of these sensors can have different noise properties and accuracy that can change depending on the situation.
The robustness of the adaptive control algorithm to changes in the sensor properties and estimation algorithm can be important to characterize when looking into purchasing components to implement the controller on physical hardware.


\subsection{Running on Physical Hardware}

The most practical extension to this work would be to move beyond simulation and run the control system on physical hardware.
Many challenges will arise in this undertaking, including selection of the particular hardware platform, sensors to use, and real-time requirements of the control algorithm.
Overcoming these challenges to produce an adaptive neural quadcopter that can interact with the real world will be highly beneficial for exploring both practical applications of this work as well as guiding future research direction.

\section{Conclusion}

This project was undertaken to design an adaptive quadcopter controller capable of being implemented on neuromorphic hardware and evaluate its performance with respect to conventional controller design methods.
The results obtained in simulation are highly promising and warrant future investigation of more sophisticated neural navigation and planning systems as well as implementation on physical hardware.

% The \appendix statement indicates the beginning of the appendices.
\appendix

% Add a title page before the appendices and a line in the Table of Contents
\chapter*{APPENDICES}
\addcontentsline{toc}{chapter}{APPENDICES}
%======================================================================
\chapter[Link to the Source Code]{Source Code for Thesis Work}
\label{AppendixA}
% Tip 4: Example of how to get a shorter chapter title for the Table of Contents 
%======================================================================

All of the source code used to create the quadcopter controllers presented in this thesis is available for download at: https://github.com/bjkomer/masters-thesis

Instructions for downloading and installing the necessary software to run the simulations and benchmarks are also provided at this link.

%----------------------------------------------------------------------
% END MATERIAL
%----------------------------------------------------------------------

% B I B L I O G R A P H Y
% -----------------------

% The following statement selects the style to use for references.  It controls the sort order of the entries in the bibliography and also the formatting for the in-text labels.
\bibliographystyle{plain}
% This specifies the location of the file containing the bibliographic information.  
% It assumes you're using BibTeX (if not, why not?).
\cleardoublepage % This is needed if the book class is used, to place the anchor in the correct page,
                 % because the bibliography will start on its own page.
                 % Use \clearpage instead if the document class uses the "oneside" argument
\phantomsection  % With hyperref package, enables hyperlinking from the table of contents to bibliography             
% The following statement causes the title "References" to be used for the bibliography section:
\renewcommand*{\bibname}{References}

% Add the References to the Table of Contents
\addcontentsline{toc}{chapter}{\textbf{References}}

\bibliography{thesis}
% Tip 5: You can create multiple .bib files to organize your references. 
% Just list them all in the \bibliogaphy command, separated by commas (no spaces).

% The following statement causes the specified references to be added to the bibliography% even if they were not 
% cited in the text. The asterisk is a wildcard that causes all entries in the bibliographic database to be included (optional).
\nocite{*}

\end{document}