inital version exma
authorMart Lubbers <mart@martlubbers.net>
Thu, 20 Oct 2016 10:43:07 +0000 (12:43 +0200)
committerMart Lubbers <mart@martlubbers.net>
Thu, 20 Oct 2016 10:43:07 +0000 (12:43 +0200)
exam/Makefile [new file with mode: 0644]
exam/exam.tex [new file with mode: 0644]
exam/q1.tex [new file with mode: 0644]
exam/q2.tex [new file with mode: 0644]
exam/q3.tex [new file with mode: 0644]
exam/rutitlepage.sty [new file with mode: 0644]

diff --git a/exam/Makefile b/exam/Makefile
new file mode 100644 (file)
index 0000000..b74a345
--- /dev/null
@@ -0,0 +1,12 @@
+DOCUMENTS:=exam
+PDFLATEXFLAGS:=-halt-on-error
+PDFLATEX:=pdflatex $(PDFLATEXFLAGS)
+
+all: $(addsuffix .pdf,$(DOCUMENTS))
+
+%.pdf: %.tex q1.tex q2.tex q3.tex
+       $(PDFLATEX) $<
+       $(PDFLATEX) $<
+
+clean:
+       $(RM) -v $(addprefix $(DOCUMENTS),.pdf .log .aux)
diff --git a/exam/exam.tex b/exam/exam.tex
new file mode 100644 (file)
index 0000000..26a6d74
--- /dev/null
@@ -0,0 +1,29 @@
+\documentclass[titlepage,a4paper]{article}
+
+\usepackage{rutitlepage}
+\usepackage{geometry}
+\usepackage{enumitem}
+\usepackage{listings}
+
+\title{Midterm Exam}
+\author{Mart Lubbers\\s4109503}
+\date{\today}
+
+\begin{document}
+\maketitleru[%
+       course={Introduction to Language and Speech Technology},
+       authorstext={Author:}]
+\begin{enumerate}
+       % Question 1
+       \item\input{q1.tex}
+
+       \newpage
+       % Question 2
+       \item\input{q2.tex}
+
+       \newpage
+       % Question 3
+       \item\input{q3.tex}
+\end{enumerate}
+               
+\end{document}
diff --git a/exam/q1.tex b/exam/q1.tex
new file mode 100644 (file)
index 0000000..a79e17f
--- /dev/null
@@ -0,0 +1,67 @@
+\begin{enumerate}
+       % Question 1a
+       \item Disfluencies are annotated by surrounding them with square braces.
+               The first bit shows the \emph{reparandum}, the second bit denoted with
+               the \texttt{+} shows the \emph{editing phase} and the last bit shows
+               the \emph{repair}. We want to only keep the repair since that depicts
+               the correct, meant by the speaker, speech.
+
+               \verb#s/\[.*?\+\{.*?\}(.*?)\]/\1/g#
+
+               Bit by bit:
+               \begin{itemize}
+                       \item \verb#s/# Substitution.
+                       \item \verb#\[# Matches the opening square bracket. We escape this
+                               because \verb#[# is a regular expression control character and
+                               we want to match a literal.
+                       \item \verb#.*?\+# Matches non-greedily everything up to the plus
+                               mark. Thus the \emph{reparandum}. Note that the
+                               \emph{reparandum} can be empty (in case the speaker immediately
+                               start editing). We escape the \verb#+# for the same reason as
+                               the previous segment.
+                       \item \verb#\{.*?\}# Matches everything between the curly braces.
+                               Thus the \emph{editing phase}. Note again that this match can
+                               only contain empty curly braces since the \emph{editing phase}
+                               can be empty.
+                       \item \verb#(.*?)# Matches non-greedily everything up to the
+                               closing square brace and captures it in the group. Thus the
+                               \emph{repair}. Note that we do not require this group to be the
+                               exact same as the \emph{reparandum}.
+                       \item \verb#\]/# Matches the closing square bracket and we proceed
+                               to the replacement.  We escape this for  the same reason as
+                               before.
+                       \item \verb#\1/g# We replace the entire match with only the
+                               captured \emph{repair} group and do this globally since there
+                               can be multiple repairs in an utterance.
+               \end{itemize}
+
+       % Question 1b
+       \item \textsc{MEMM}'s use features to add extra information to words.
+               \textsc{IOB} tagging is a partial parsing or chunking method that only
+               discriminates between \emph{Beginning} (\texttt{B}), \emph{Internal}
+               (\texttt{I}) and \emph{Outside} (\texttt{O}) categories.
+
+               Say we use the same segmentation as before, we should mark the
+               \emph{reparandum} and \emph{editing phase} as \emph{Outside}
+               (\texttt{O}) parts and the repair should be parsed as usual.  Note that
+               a chunk then can include \texttt{O} marked segments. For example in ``a
+               car uh plane'' the ``car uh'' part will be tagged as \texttt{O}, ``a''
+               as \texttt{B\_NP} and ``plane'' as \texttt{I\_NP}.
+
+               For the algorithms it might be necessary to add a different tag to
+               denote internal \texttt{O} segments. This can be done by adding a
+               suffix to the \texttt{O} tag. In the previous example the text will
+               then be chunked as: \texttt{B\_NP O\_NP I\_NP}.
+
+               Concerning the \textsc{MEMM} features, obviously editing phase segments
+               should be marked as such but also the reparandum should be tagged as
+               such to not confuse it with a regular segment.
+
+       % Question 1c
+       \item Repairs are only noticed when you can lookahead to the \emph{editing
+               phase} markers. It might be necessary to either lookahead a little bit
+               or to work outwards from the identified \emph{editing phase}.
+               Right-to-left has the same problem as left-to-right in the sense that
+               it will see the repair first and also has to lookahead to know whether
+               it is part of a repair.
+\end{enumerate}
diff --git a/exam/q2.tex b/exam/q2.tex
new file mode 100644 (file)
index 0000000..ecd1781
--- /dev/null
@@ -0,0 +1,47 @@
+\begin{enumerate}
+       % Question 2a
+       \item This can be achieved by adding disfluency rules to the \textsc{CFG}.
+               This has to be done for all rules that can possible produce
+               disfluencies. Most likely only the lowest level of rules (unit
+               productions) need such disfluency structures. For example, if we would
+               do it for the rule that transforms a \texttt{Noun} into a word it would
+               look like this:
+
+               \begin{lstlisting}
+Noun -> TrueNoun | EditNoun TrueNoun
+TrueNoun -> flight | ...
+
+EditNoun -> TrueNoun EditWord
+EditWord -> uh | ...
+               \end{lstlisting}
+
+               With feature structures this can be generalized and have less
+               ambiguitiy. Features can for example force the \emph{Reparandum} to be
+               of the same \texttt{CAT} as the \emph{Repair} and disfluencies might
+               have some constraints that can also be expressed with features.
+
+       % Question 2b
+       \item Standard \textsc{CKY} parsing only works for grammars in
+               \emph{Chomsky Normal Form} (\textsc{CNF}). This means that the tree
+               returned will not exactly represent the \textsc{CFG} since it possibly
+               had to be converted to \textsc{CNF}. To adapt \textsc{CKY} in a
+               fundamental way so that it correctly parses repair structures would be
+               very difficult, albeit impossible. It basically means that, in the
+               deepest loop, you have to build in functionality that is similar to the
+               grammar that recognizes such structures and behave accordingly. While
+               this is probably theoretically possible, it will result in a different
+               algorithm that has a hard-coded sub-grammar in itself.
+
+       % Question 2c
+       \item Similar to the previous sub-question; while it is possible to make the
+               \emph{Predictor} more smart and add disfluency structures to the chart
+               it would change the \emph{Earley} algorithm significantly. The change
+               of the algorithm would also be very specific to certain disfluency
+               structures and makes it possibly unusable for languages that do not
+               have such structures. Note that it is more easy to add this to an
+               \emph{Earley} parser compared to adding it to an \emph{CKY} parser. For
+               an \emph{Earley} parser it just means hard-coding some extra grammar
+               rules in the \emph{Predictor}. For \emph{CKY} it means transforming
+               the rules to specific transformations in the table which might not be
+               trivial.
+\end{enumerate}
diff --git a/exam/q3.tex b/exam/q3.tex
new file mode 100644 (file)
index 0000000..b3061a3
--- /dev/null
@@ -0,0 +1,61 @@
+\begin{enumerate}
+       % Question 3a
+       \item In an \emph{ASR} system we can expect problems in several phases.
+
+               The first phase of an \emph{ASR} is just extracting the features. We
+               do not expect problems there since it will just produce slightly
+               different features for some part but that is not something the feature
+               extraction cares about. It just objectively has to extract features and
+               since it is still human speech, there are no problem with disfluencies.
+
+               When trying to transform the cepstral features into a sentence several
+               components are involved. First a phone likelihood is calculated, we
+               maybe expect slight problems here since even the phones might be
+               reduced and the \emph{editing phase} words could just be rudimentary
+               sounds instead of phones and thus it might select suboptimal
+               likelihoods.
+
+               When decoding the phone likelihood into words a lexicon is used. This
+               lexicon might not contain the edit words and possibly also not the
+               reduced \emph{reparanda}.
+
+               Finally during \emph{Viterbi} \emph{N-Gram} models come into play and
+               if they are not extracted from a dataset that also included
+               disfluencies it might be the case that the probabilities of
+               disfluencies appearing are so low that it tries to fit similarly
+               sounding real words instead of the disfluency.
+
+       % Question 3b
+       \item Solving the problem of phone likelihood computation can be done by
+               shrinking the window of the feature extraction so that strongly reduced
+               phones are also correctly recognized.
+
+               Solving the second problem can be done by adding disfluency words to
+               the lexicon and also more reductive pronunciations of words.
+
+               Lastly we can increase the decoding performance by specifically
+               extracting the \emph{N-Gram} probabilities from data that also contains
+               disfluencies.
+
+       % Question 3c
+       \item To add disfluencies to speech synthesis one must know how they arise.
+               There are some word categories that have more disfluencies than others.
+               Also they may be produced to give speaker some more time to think about
+               the rest of the sentence. When you know such properties of disfluencies
+               you can model them in the speech synthesis in the normalization phase.
+
+               In the normalization phase the system can add disfluencies at sections
+               that often produce them. This most likely is the most effective in
+               tokenisation. Specific tokens that can be selected to be expanded to a
+               disfluency.
+
+               Later on in the pipeline the system must also be adapted. Namely in the
+               waveform synthesis. Depending on the technique applied some
+               improvements can be done. When the synthesis technique is unit
+               selection it might be helpful to have units for common disfluencies and
+               at least units for \emph{editing phase} words. It might also be helpful
+               to add units that represent a reduced pronunciation to be used in the
+               \emph{reparandum}. When \emph{diphone synthesis} is used there do not
+               have to be big changes to be applied since most likely the diphone
+               combinations already exist in the database.
+\end{enumerate}
diff --git a/exam/rutitlepage.sty b/exam/rutitlepage.sty
new file mode 100644 (file)
index 0000000..ab0afc5
--- /dev/null
@@ -0,0 +1,58 @@
+% Radboud University Nijmegen titlepage
+% Author: Mart Lubbers
+% Date: 2016-06-28
+%
+% TODO
+% - Internationalize (dutch logos)
+% - Nice document
+% - Make CTAN ready
+\RequirePackage{graphicx,ifpdf,keyval}
+
+\makeatletter
+\define@key{maketitleru}{course}{\def\@rutitlecourse{#1}}
+\define@key{maketitleru}{institute}{\def\@rutitleinst{#1}}
+\define@key{maketitleru}{authorstext}{\def\@rutitleauthorstext{#1}}
+\define@key{maketitleru}{righttext}{\def\@rutitlerighttext{#1}}
+\define@key{maketitleru}{righttextheader}{\def\@rutitlerighttextheader{#1}}
+\setkeys{maketitleru}{%
+       course={},
+       institute={Radboud University Nijmegen},
+       authorstext={Authors:},
+       righttextheader={},
+       righttext={}
+}      
+\newcommand{\maketitleru}[1][]{
+       \setkeys{maketitleru}{#1}
+       \begin{titlepage}
+               \makeatletter
+               \begin{center}
+                       \textsc{\LARGE\@rutitlecourse}\\[1.5cm]
+                       \ifpdf\includegraphics[height=150pt]{logo.pdf}\\
+                       \else\includegraphics[height=150pt]{logo.eps}\\
+                       \fi
+                       \vspace{0.4cm}
+                       \textsc{\Large\@rutitleinst}\\[1cm]
+                       \hrule
+                       \vspace{0.4cm}
+                       \textbf{\large\@title}\\[0.4cm]
+                       \hrule
+                       \vspace{2cm}
+                       \begin{minipage}[t]{0.45\textwidth}
+                               \begin{flushleft}\large
+                                       \textit{\@rutitleauthorstext}\\
+                                       \@author{}
+                               \end{flushleft}
+                       \end{minipage}
+                       \begin{minipage}[t]{0.45\textwidth}
+                                       \begin{flushright}\large
+                                       \textit{\@rutitlerighttextheader}\\
+                                       \@rutitlerighttext
+                               \end{flushright}
+                       \end{minipage}
+                       \vfill
+                       {\large\@date}
+               \end{center}
+               \makeatother
+       \end{titlepage}
+}
+\makeatother