@InProceedings{goto:icassp:2011,
  author    = {Goto, Masataka and Nakano, Tomoyasu and Kajita, Shuuji and Matsusaka, Yosuke and Nakaoka, Shin\textquotesingle ichiro and Yokoi, Kazuhito},
  title     = {VocaListener and VocaWatcher: Imitating a Human Singer by Using Signal Processing},
  booktitle = {IEEE International Conference on Acoustics, Speech, and Signal Processing},
  year      = {2011},
  pages     = {5927--5933},
  address   = {Prague, Czech Republic},
  month     = {May 22-May 27},
  url       = {https://staff.aist.go.jp/t.nakano/PAPER/ICASSP2012goto.pdf},
  keywords  = {Music, singing information processing, singing synthesis, singing robot},
  doi       = {10.1109/ICASSP.2012.6289140},
  abstract  = {In this paper, we describe three singing information processing systems, VocaListener, VocaListener2, and VocaWatcher, that imitate singing expressions of the voice and face of a human singer. VocaListener can synthesize natural singing voices by analyzing and imitating the pitch and dynamics of the human singing. VocaListener2 imitates temporal timbre changes in addition to the pitch and dynamics. In synchronization with the synthesized singing voices, VocaWatcher can generate realistic facial motions of a humanoid robot, the HRP-4C, by analyzing and imitating facial motions of a human singing that are recorded by a single video camera. These systems that focus on \textquotedblleft imitation\textquotedblright  are not only promising for representing human-like naturalness, but also useful for providing intuitive control means.}
}