MAKEGLOSSARIESFLAGS?=
LATEXFLAGS:=-file-line-error -halt-on-error -no-shell-escape
+FIGURES:=$(addsuffix .png,$(addprefix img/,$(notdir $(basename $(wildcard fig/*.dot)))))
+LISTINGS:=$(wildcard listings/*)
+TEXS:=$(wildcard *.bib)
+BIBS:=$(wildcard *.tex)
+
.PHONY: all clean
-.SECONDARY: $(addsuffix .fmt,$(DOCS))
+.SECONDARY: $(addsuffix .fmt,$(DOCS)) $(FIGURES)
all: $(addsuffix .pdf,$(DOCS))
%.fmt: %.pre
$(LATEX) $(LATEXFLAGS) -ini -jobname="$(basename $@)" "&$(LATEX) $<\dump"
-%.pdf: %.tex %.fmt $(wildcard *.bib) $(wildcard *.tex)
+%.pdf: %.mlog
+ if $(GREP) -iFq 'Rerun' $<; then $(LATEX) $(LATEXFLAGS) $(basename $<); fi
+
+%.mlog: %.tex %.fmt $(BIBS) $(TEXS) $(FIGURES) $(LISTINGS)
$(LATEX) $(LATEXFLAGS) $<
if $(GREP) -q '^\\bibdata{' $(basename $<).aux; then $(BIBTEX) $(BIBTEXFLAGS) $(basename $<); fi
if $(GREP) -q '\@istfilename' $(basename $<).aux; then $(MAKEGLOSSARIES) $(MAKEGLOSSARIESFLAGSFLAGS) $(basename $<); fi
- $(LATEX) $(LATEXFLAGS) $< | tee $(basename $@).mlog
- $(GREP) -iFq 'Rerun' $(basename $@).mlog && $(LATEX) $(LATEXFLAGS) $< | tee $(basename $@).mlog || true
- $(RM) $(basename $@).mlog
-
-clean: $(addprefix clean-,$(DOCS))
-
-clobber: $(addprefix clobber-,$(DOCS))
+ $(LATEX) $(LATEXFLAGS) $< | tee $@
-clean-%:
- $(RM) $(addprefix $(@:clean-%=%).,acn acr alg aux bbl blg fmt glg glo gls\
- ist lof log lol lot nav out run.xml snm tdo toc vrb xdy)
+img/%.png: fig/%.dot
+ dot -Tpng $< > $@
-clobber-%:
- $(RM) $(@:clobber-%=%).pdf
+clean:
+ $(RM) $(foreach DOC,$(DOCS),$(addprefix $(DOC).,\
+ acn acr alg aux bbl blg fmt glg glo gls ist lof log lol lot nav out\
+ pdf run.xml snm tdo toc vrb xdy)) $(FIGURES)
\centering
\begin{tabular}{cll}
\toprule
- Num. & Song & Duration\\
+ Num. & Song & Duration (mm:ss.ss)\\
\midrule
\multicolumn{3}{l}{\bf Cannibal Corpse {-} A Skeletal Domain}\\
00 & High Velocity Impact Spatter & 04:06.91\\
--- /dev/null
+digraph {
+ rankdir=LR;
+ graph [dpi=400,ordering=out,splines=false];
+ subgraph cluster_in {
+ label="Input"
+ i1 [label=<i<SUB>1</SUB>>];
+ i2 [label=<i<SUB>2</SUB>>];
+ id [label=<…>];
+ i13 [label=<i<SUB>13</SUB>>];
+ }
+
+ subgraph cluster_hidden {
+ label="Hidden"
+ h1 [label=<h<SUB>1</SUB>>];
+ h2 [label=<h<SUB>2</SUB>>];
+ hd [label=<…>];
+ hn [label=<h<SUB>n</SUB>>];
+ }
+
+ subgraph cluster_output {
+ label="Output"
+ o;
+ }
+
+ i1 -> h1; i1 -> h2; i1 -> hd; i1 -> hn;
+ i2 -> h1; i2 -> h2; i2 -> hd; i2 -> hn;
+ id -> h1; id -> h2; id -> hd; id -> hn;
+ i13 -> h1; i13 -> h2; i13 -> hd; i13 -> hn;
+
+ h1 -> o; h2 -> o; hd -> o; hn -> o;
+}
--- /dev/null
+digraph {
+ rankdir=LR;
+ graph [dpi=400,ordering=out,splines=false];
+ subgraph cluster_in {
+ label="Input"
+ i1 [label=<i<SUB>1</SUB>>];
+ i2 [label=<i<SUB>2</SUB>>];
+ id [label=<…>];
+ i13 [label=<i<SUB>13</SUB>>];
+ }
+
+ subgraph cluster_hidden {
+ label="Hidden"
+ h1 [label=<h<SUB>1</SUB>>];
+ h2 [label=<h<SUB>2</SUB>>];
+ hd [label=<…>];
+ hn [label=<h<SUB>n</SUB>>];
+ }
+
+ subgraph cluster_output {
+ label="Output"
+ o1 [label=<o<SUB>1</SUB>>];
+ o2 [label=<o<SUB>2</SUB>>];
+ od [label=<…>];
+ on [label=<o<SUB>n</SUB>>];
+ }
+
+ i1 -> h1; i1 -> h2; i1 -> hd; i1 -> hn;
+ i2 -> h1; i2 -> h2; i2 -> hd; i2 -> hn;
+ id -> h1; id -> h2; id -> hd; id -> hn;
+ i13 -> h1; i13 -> h2; i13 -> hd; i13 -> hn;
+
+ h1 -> o1; h1 -> o2; h1 -> od; h1 -> on;
+ h2 -> o1; h2 -> o2; h2 -> od; h2 -> on;
+ hd -> o1; hd -> o2; hd -> od; hd -> on;
+ hn -> o1; hn -> o2; hn -> od; hn -> on;
+}
uses piano's and synthesizers. The droning synthesizers often operate in the
same frequency as the vocals.
+The training and test data is divided as follows:
+\begin{table}[H]
+ \centering
+ \begin{tabular}{lcc}
+ \toprule
+ Singing & Instrumental\\
+ \midrule
+ 0.59 & 0.41\\
+ \bottomrule
+ \end{tabular}
+ \quad
+ \begin{tabular}{lcccc}
+ \toprule
+ Instrumental & CC & DG & WDISS\\
+ \midrule
+ \bottomrule
+ \end{tabular}
+\end{table}
+
\section{\gls{MFCC} Features}
The waveforms in itself are not very suitable to be used as features due to the
high dimensionality and correlation. Therefore we use the often used
\section{\gls{ANN} Classifier}
\todo{Spectrals might be enough, no decorrelation}
-\section{Model training}
-
\section{Experiments}
+\subsection{\emph{Singing} voice detection}
+The first type of experiment conducted is \emph{Singing} voice detection. This
+is the act of segmenting an audio signal into segments that are labeled either
+as \emph{Singing} or as \emph{Instrumental}. The input of the classifier is a
+feature vector and the output is the probability that singing is happening in
+the sample.
+
+\begin{figure}[H]
+ \centering
+ \includegraphics[width=.5\textwidth]{bcann}
+ \caption{Binary classifier network architecture}\label{fig:bcann}
+\end{figure}
+
+\subsection{\emph{Singer} voice detection}
+The second type of experiment conducted is \emph{Singer} voice detection. This
+is the act of segmenting an audio signal into segments that are labeled either
+with the name of the singer or as \emph{Instrumental}. The input of the
+classifier is a feature vector and the outputs are probabilities for each of
+the singers and a probability for the instrumental label.
\section{Results}