From 89388da65c845339f811e8ff89afb7a54ac0a2b2 Mon Sep 17 00:00:00 2001 From: Mart Lubbers Date: Tue, 16 May 2017 14:47:25 +0200 Subject: [PATCH] add figures that illustrate model layout --- .gitignore | 1 + Makefile | 31 +++++++++++++++++-------------- appendices.tex | 2 +- fig/bcann.dot | 31 +++++++++++++++++++++++++++++++ fig/mcann.dot | 37 +++++++++++++++++++++++++++++++++++++ methods.tex | 40 ++++++++++++++++++++++++++++++++++++++-- 6 files changed, 125 insertions(+), 17 deletions(-) create mode 100644 fig/bcann.dot create mode 100644 fig/mcann.dot diff --git a/.gitignore b/.gitignore index 8df797c..16f96cf 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ *.tdo *.toc *.xdy +*.png diff --git a/Makefile b/Makefile index b5714bc..62b3d23 100644 --- a/Makefile +++ b/Makefile @@ -7,29 +7,32 @@ MAKEGLOSSARIES?=makeglossaries MAKEGLOSSARIESFLAGS?= LATEXFLAGS:=-file-line-error -halt-on-error -no-shell-escape +FIGURES:=$(addsuffix .png,$(addprefix img/,$(notdir $(basename $(wildcard fig/*.dot))))) +LISTINGS:=$(wildcard listings/*) +TEXS:=$(wildcard *.bib) +BIBS:=$(wildcard *.tex) + .PHONY: all clean -.SECONDARY: $(addsuffix .fmt,$(DOCS)) +.SECONDARY: $(addsuffix .fmt,$(DOCS)) $(FIGURES) all: $(addsuffix .pdf,$(DOCS)) %.fmt: %.pre $(LATEX) $(LATEXFLAGS) -ini -jobname="$(basename $@)" "&$(LATEX) $<\dump" -%.pdf: %.tex %.fmt $(wildcard *.bib) $(wildcard *.tex) +%.pdf: %.mlog + if $(GREP) -iFq 'Rerun' $<; then $(LATEX) $(LATEXFLAGS) $(basename $<); fi + +%.mlog: %.tex %.fmt $(BIBS) $(TEXS) $(FIGURES) $(LISTINGS) $(LATEX) $(LATEXFLAGS) $< if $(GREP) -q '^\\bibdata{' $(basename $<).aux; then $(BIBTEX) $(BIBTEXFLAGS) $(basename $<); fi if $(GREP) -q '\@istfilename' $(basename $<).aux; then $(MAKEGLOSSARIES) $(MAKEGLOSSARIESFLAGSFLAGS) $(basename $<); fi - $(LATEX) $(LATEXFLAGS) $< | tee $(basename $@).mlog - $(GREP) -iFq 'Rerun' $(basename $@).mlog && $(LATEX) $(LATEXFLAGS) $< | tee $(basename $@).mlog || true - $(RM) $(basename $@).mlog - -clean: $(addprefix clean-,$(DOCS)) - -clobber: $(addprefix clobber-,$(DOCS)) + $(LATEX) $(LATEXFLAGS) $< | tee $@ -clean-%: - $(RM) $(addprefix $(@:clean-%=%).,acn acr alg aux bbl blg fmt glg glo gls\ - ist lof log lol lot nav out run.xml snm tdo toc vrb xdy) +img/%.png: fig/%.dot + dot -Tpng $< > $@ -clobber-%: - $(RM) $(@:clobber-%=%).pdf +clean: + $(RM) $(foreach DOC,$(DOCS),$(addprefix $(DOC).,\ + acn acr alg aux bbl blg fmt glg glo gls ist lof log lol lot nav out\ + pdf run.xml snm tdo toc vrb xdy)) $(FIGURES) diff --git a/appendices.tex b/appendices.tex index 3faffc3..c5aadd9 100644 --- a/appendices.tex +++ b/appendices.tex @@ -3,7 +3,7 @@ \centering \begin{tabular}{cll} \toprule - Num. & Song & Duration\\ + Num. & Song & Duration (mm:ss.ss)\\ \midrule \multicolumn{3}{l}{\bf Cannibal Corpse {-} A Skeletal Domain}\\ 00 & High Velocity Impact Spatter & 04:06.91\\ diff --git a/fig/bcann.dot b/fig/bcann.dot new file mode 100644 index 0000000..608b740 --- /dev/null +++ b/fig/bcann.dot @@ -0,0 +1,31 @@ +digraph { + rankdir=LR; + graph [dpi=400,ordering=out,splines=false]; + subgraph cluster_in { + label="Input" + i1 [label=1>]; + i2 [label=2>]; + id [label=<…>]; + i13 [label=13>]; + } + + subgraph cluster_hidden { + label="Hidden" + h1 [label=1>]; + h2 [label=2>]; + hd [label=<…>]; + hn [label=n>]; + } + + subgraph cluster_output { + label="Output" + o; + } + + i1 -> h1; i1 -> h2; i1 -> hd; i1 -> hn; + i2 -> h1; i2 -> h2; i2 -> hd; i2 -> hn; + id -> h1; id -> h2; id -> hd; id -> hn; + i13 -> h1; i13 -> h2; i13 -> hd; i13 -> hn; + + h1 -> o; h2 -> o; hd -> o; hn -> o; +} diff --git a/fig/mcann.dot b/fig/mcann.dot new file mode 100644 index 0000000..495e141 --- /dev/null +++ b/fig/mcann.dot @@ -0,0 +1,37 @@ +digraph { + rankdir=LR; + graph [dpi=400,ordering=out,splines=false]; + subgraph cluster_in { + label="Input" + i1 [label=1>]; + i2 [label=2>]; + id [label=<…>]; + i13 [label=13>]; + } + + subgraph cluster_hidden { + label="Hidden" + h1 [label=1>]; + h2 [label=2>]; + hd [label=<…>]; + hn [label=n>]; + } + + subgraph cluster_output { + label="Output" + o1 [label=1>]; + o2 [label=2>]; + od [label=<…>]; + on [label=n>]; + } + + i1 -> h1; i1 -> h2; i1 -> hd; i1 -> hn; + i2 -> h1; i2 -> h2; i2 -> hd; i2 -> hn; + id -> h1; id -> h2; id -> hd; id -> hn; + i13 -> h1; i13 -> h2; i13 -> hd; i13 -> hn; + + h1 -> o1; h1 -> o2; h1 -> od; h1 -> on; + h2 -> o1; h2 -> o2; h2 -> od; h2 -> on; + hd -> o1; hd -> o2; hd -> od; hd -> on; + hn -> o1; hn -> o2; hn -> od; hn -> on; +} diff --git a/methods.tex b/methods.tex index a15c421..49f8426 100644 --- a/methods.tex +++ b/methods.tex @@ -50,6 +50,25 @@ and performs in several Muscovite bands. This band also stands out because it uses piano's and synthesizers. The droning synthesizers often operate in the same frequency as the vocals. +The training and test data is divided as follows: +\begin{table}[H] + \centering + \begin{tabular}{lcc} + \toprule + Singing & Instrumental\\ + \midrule + 0.59 & 0.41\\ + \bottomrule + \end{tabular} + \quad + \begin{tabular}{lcccc} + \toprule + Instrumental & CC & DG & WDISS\\ + \midrule + \bottomrule + \end{tabular} +\end{table} + \section{\gls{MFCC} Features} The waveforms in itself are not very suitable to be used as features due to the high dimensionality and correlation. Therefore we use the often used @@ -84,9 +103,26 @@ steps. \section{\gls{ANN} Classifier} \todo{Spectrals might be enough, no decorrelation} -\section{Model training} - \section{Experiments} +\subsection{\emph{Singing} voice detection} +The first type of experiment conducted is \emph{Singing} voice detection. This +is the act of segmenting an audio signal into segments that are labeled either +as \emph{Singing} or as \emph{Instrumental}. The input of the classifier is a +feature vector and the output is the probability that singing is happening in +the sample. + +\begin{figure}[H] + \centering + \includegraphics[width=.5\textwidth]{bcann} + \caption{Binary classifier network architecture}\label{fig:bcann} +\end{figure} + +\subsection{\emph{Singer} voice detection} +The second type of experiment conducted is \emph{Singer} voice detection. This +is the act of segmenting an audio signal into segments that are labeled either +with the name of the singer or as \emph{Instrumental}. The input of the +classifier is a feature vector and the outputs are probabilities for each of +the singers and a probability for the instrumental label. \section{Results} -- 2.20.1