From: Mart Lubbers Date: Tue, 11 Apr 2017 17:48:36 +0000 (+0200) Subject: brush up introduction X-Git-Url: https://git.martlubbers.net/?a=commitdiff_plain;h=ee5242ed1d749374a68ac1c49e578c12bc4a68d7;p=asr1617.git brush up introduction --- diff --git a/Makefile b/Makefile index f40e93c..15d03f8 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ all: $(addsuffix .pdf,$(DOCS)) $(LATEX) $(LATEXFLAGS) -ini -jobname="$(basename $@)" "&$(LATEX) $<\dump" %.pdf: %.mlog - if $(GREP) -iFq 'Rerun to get' $<; then $(LATEX) $(LATEXFLAGS) $(basename $<); fi + if $(GREP) -iFq 'Rerun' $<; then $(LATEX) $(LATEXFLAGS) $(basename $<); fi %.mlog: %.tex %.fmt $(wildcard *.bib) $(wildcard *.tex) $(LATEX) $(LATEXFLAGS) $< diff --git a/asr.bib b/asr.bib index 594ead2..d505823 100644 --- a/asr.bib +++ b/asr.bib @@ -232,4 +232,37 @@ author = {Scheirer, Eric and Slaney, Malcolm}, year = {1997}, pages = {1331--1334} +} + +@inproceedings{kato_acoustic_2013, + title = {Acoustic {Features} and {Auditory} {Impressions} of {Death} {Growl} and {Screaming} {Voice}}, + isbn = {978-0-7695-5120-3}, + url = {http://ieeexplore.ieee.org/document/6846676/}, + doi = {10.1109/IIH-MSP.2013.120}, + urldate = {2017-04-11}, + publisher = {IEEE}, + author = {Kato, Keizo and Ito, Akinori}, + month = oct, + year = {2013}, + pages = {460--463}, + file = {Acoustic Features and Auditory Impressions of Death Growl and Screaming Voice - 06846676.pdf:/home/mrl/.mozilla/firefox/7b4r727h.default-1470981082057/zotero/storage/VAT5AGPP/06846676.pdf:application/pdf} +} + +@inproceedings{sakakibara_growl_2004, + title = {Growl voice in ethnic and pop styles}, + url = {http://www.overtone.cc/profiles/blogs/884327:BlogPost:7416}, + urldate = {2017-04-11}, + booktitle = {Proc. {Int}. {Symp}. on {Musical} {Acoustics}}, + author = {Sakakibara, K. and Fuks, Leonardo and Imagawa, Hiroshi and Tayama, Niro and Naganuma, D.}, + year = {2004}, + file = {isma04.pdf:/home/mrl/.mozilla/firefox/7b4r727h.default-1470981082057/zotero/storage/PUFH652B/isma04.pdf:application/pdf} +} + +@misc{friis_vikings_2004, + title = {Vikings and their {Music}}, + url = {http://www.viking.no/e/life/music/e-musikk-mogens.html}, + urldate = {2017-04-11}, + author = {Friis, Mogens}, + year = {2004}, + file = {Vikings and their Music:/home/mrl/.mozilla/firefox/7b4r727h.default-1470981082057/zotero/storage/SEEXI3VR/e-musikk-mogens.html:text/html} } \ No newline at end of file diff --git a/asr.pre b/asr.pre index fbb3c42..6ec46d4 100644 --- a/asr.pre +++ b/asr.pre @@ -8,6 +8,7 @@ \usepackage{booktabs} % Better looking tables \usepackage{todonotes} % Todo's \usepackage{float} % Floating tables +\usepackage{csquotes} % Typeset quotes \graphicspath{{img/}} diff --git a/asr.tex b/asr.tex index 4899bd1..1316967 100644 --- a/asr.tex +++ b/asr.tex @@ -64,6 +64,21 @@ along. Because of this interest it is very useful to device automatic techniques for segmenting instrumental and vocal parts of a song and apply forced alignment or even lyrics recognition on the audio file. +Such techniques are heavily researched and working systems have been created. +However, these techniques are designed to detect a clean singing voice. Extreme +genres such as \gls{dm} are using more extreme vocal techniques such as +grunting or growling. It must be noted that grunting is not a technique only +used in extreme metal styles. Similar or equal techniques have been used in +\emph{Beijing opera}, Japanese \emph{Noh} and but also more western styles like +jazz singing by Louis Armstrong~\cite{sakakibara_growl_2004}. It might even be +traced back to viking times. An arab merchant wrote in the tenth +century~\cite{friis_vikings_2004}: + +\begin{displayquote} + Never before I have heard uglier songs than those of the Vikings in + Slesvig. The growling sound coming from their throats reminds me of dogs + howling, only more untamed. +\end{displayquote} %A majority of the music is not only instrumental but also contains vocal %segments. @@ -103,11 +118,19 @@ separate singing from instrumental music\cite{berenzweig_locating_2001}. ~\cite{pedone_phoneme-level_2011} ~\cite{yang_machine_2012} + + \section{Research question} -This leads to the following research question: +It is discutable whether the aforementioned techniques work because the +spectral properties of a growling voice is different from the spectral +properties of a clean singing voice. It has been found that growling voices +have less prominent peaks in the frequency representation and are closer to +noise then clean singing\cite{kato_acoustic_2013}. This leads us to the +research question: + \begin{center}\em% Are standard \gls{ANN} based techniques for singing voice detection - suitable for non-standard musical genres like Death metal. + suitable for non-standard musical genres like \gls{dm}. \end{center} \chapter{Methods} @@ -115,8 +138,8 @@ This leads to the following research question: %Experiment(s) (set-up, data, results, discussion) \section{Data \& Preprocessing} -To run the experiments we have collected data from several \gls{dm} albums. The -exact data used is available in Appendix~\ref{app:data}. The albums are +To run the experiments data has been collected from several \gls{dm} albums. +The exact data used is available in Appendix~\ref{app:data}. The albums are extracted from the audio CD and converted to a mono channel waveform with the correct samplerate \emph{SoX}~\footnote{\url{http://sox.sourceforge.net/}}. When the waveforms are finished they are converted to \glspl{MFCC} vectors @@ -124,8 +147,8 @@ using the \emph{python\_speech\_features}% ~\footnote{\url{https://github.com/jameslyons/python_speech_features}} package. All these steps combined results in thirteen tab separated features per line in a file for every source file. Every file is annotated using -Praat~\cite{boersma_praat_2002} where the utterances are manually -aligned to the audio. An example of an utterances are shown in +Praat~\cite{boersma_praat_2002} where the utterances are manually aligned to +the audio. An example of an utterances are shown in Figures~\ref{fig:bloodstained,fig:abominations}. It is clearly visible that within the genre of death metal there are a lot of different spectral patterns visible.