Add push to talk voice assistant (#4648)

* Add push to talk voice assistant

* Refactor most code into voice_assistant

* Make voice_assistant the component and remove push_to_talk (can be done in yaml)

* Fix component setup

* Always AF_INET to match serverside

* Fix microphone and media player co-existence

* Format

* Update codeowners

* Update test file

* Fix endifs

* nullptr not NULL

* clang-tidy

* Format

* fixup: Add VA event data

* Generate proto

* Parse and log events

* Add default to switch

* Fix

* Add mic/va to test5
This commit is contained in:
Jesse Hills
2023-04-12 11:45:10 +12:00
committed by GitHub
parent 80bc567c31
commit b60c08dd28
35 changed files with 1384 additions and 75 deletions
+70
View File
@@ -0,0 +1,70 @@
import esphome.config_validation as cv
import esphome.final_validate as fv
import esphome.codegen as cg
from esphome import pins
from esphome.const import CONF_ID
from esphome.components.esp32 import get_esp32_variant
from esphome.components.esp32.const import (
VARIANT_ESP32,
VARIANT_ESP32S2,
VARIANT_ESP32S3,
VARIANT_ESP32C3,
)
CODEOWNERS = ["@jesserockz"]
DEPENDENCIES = ["esp32"]
MULTI_CONF = True
CONF_I2S_DOUT_PIN = "i2s_dout_pin"
CONF_I2S_DIN_PIN = "i2s_din_pin"
CONF_I2S_BCLK_PIN = "i2s_bclk_pin"
CONF_I2S_LRCLK_PIN = "i2s_lrclk_pin"
CONF_I2S_AUDIO = "i2s_audio"
CONF_I2S_AUDIO_ID = "i2s_audio_id"
i2s_audio_ns = cg.esphome_ns.namespace("i2s_audio")
I2SAudioComponent = i2s_audio_ns.class_("I2SAudioComponent", cg.Component)
I2SAudioIn = i2s_audio_ns.class_("I2SAudioIn", cg.Parented.template(I2SAudioComponent))
I2SAudioOut = i2s_audio_ns.class_(
"I2SAudioOut", cg.Parented.template(I2SAudioComponent)
)
# https://github.com/espressif/esp-idf/blob/master/components/soc/{variant}/include/soc/soc_caps.h
I2S_PORTS = {
VARIANT_ESP32: 2,
VARIANT_ESP32S2: 1,
VARIANT_ESP32S3: 2,
VARIANT_ESP32C3: 1,
}
CONFIG_SCHEMA = cv.Schema(
{
cv.GenerateID(): cv.declare_id(I2SAudioComponent),
cv.Required(CONF_I2S_BCLK_PIN): pins.internal_gpio_output_pin_number,
cv.Required(CONF_I2S_LRCLK_PIN): pins.internal_gpio_output_pin_number,
}
)
def _final_validate(_):
i2s_audio_configs = fv.full_config.get()[CONF_I2S_AUDIO]
variant = get_esp32_variant()
if variant not in I2S_PORTS:
raise cv.Invalid(f"Unsupported variant {variant}")
if len(i2s_audio_configs) > I2S_PORTS[variant]:
raise cv.Invalid(
f"Only {I2S_PORTS[variant]} I2S audio ports are supported on {variant}"
)
FINAL_VALIDATE_SCHEMA = _final_validate
async def to_code(config):
var = cg.new_Pvariable(config[CONF_ID])
await cg.register_component(var, config)
cg.add(var.set_bclk_pin(config[CONF_I2S_BCLK_PIN]))
cg.add(var.set_lrclk_pin(config[CONF_I2S_LRCLK_PIN]))
@@ -0,0 +1,30 @@
#include "i2s_audio.h"
#ifdef USE_ESP32
#include "esphome/core/log.h"
namespace esphome {
namespace i2s_audio {
static const char *const TAG = "i2s_audio";
void I2SAudioComponent::setup() {
static i2s_port_t next_port_num = I2S_NUM_0;
if (next_port_num >= I2S_NUM_MAX) {
ESP_LOGE(TAG, "Too many I2S Audio components!");
this->mark_failed();
return;
}
this->port_ = next_port_num;
next_port_num = (i2s_port_t) (next_port_num + 1);
ESP_LOGCONFIG(TAG, "Setting up I2S Audio...");
}
} // namespace i2s_audio
} // namespace esphome
#endif // USE_ESP32
+64
View File
@@ -0,0 +1,64 @@
#pragma once
#ifdef USE_ESP32
#include <driver/i2s.h>
#include "esphome/core/component.h"
#include "esphome/core/helpers.h"
namespace esphome {
namespace i2s_audio {
class I2SAudioComponent;
class I2SAudioIn : public Parented<I2SAudioComponent> {};
class I2SAudioOut : public Parented<I2SAudioComponent> {};
class I2SAudioComponent : public Component {
public:
void setup() override;
void register_audio_in(I2SAudioIn *in) {
this->audio_in_ = in;
in->set_parent(this);
}
void register_audio_out(I2SAudioOut *out) {
this->audio_out_ = out;
out->set_parent(this);
}
i2s_pin_config_t get_pin_config() const {
return {
.mck_io_num = I2S_PIN_NO_CHANGE,
.bck_io_num = this->bclk_pin_,
.ws_io_num = this->lrclk_pin_,
.data_out_num = I2S_PIN_NO_CHANGE,
.data_in_num = I2S_PIN_NO_CHANGE,
};
}
void set_bclk_pin(uint8_t pin) { this->bclk_pin_ = pin; }
void set_lrclk_pin(uint8_t pin) { this->lrclk_pin_ = pin; }
void lock() { this->lock_.lock(); }
bool try_lock() { return this->lock_.try_lock(); }
void unlock() { this->lock_.unlock(); }
i2s_port_t get_port() const { return this->port_; }
protected:
Mutex lock_;
I2SAudioIn *audio_in_{nullptr};
I2SAudioOut *audio_out_{nullptr};
uint8_t bclk_pin_;
uint8_t lrclk_pin_;
i2s_port_t port_{};
};
} // namespace i2s_audio
} // namespace esphome
#endif // USE_ESP32
@@ -5,22 +5,25 @@ import esphome.config_validation as cv
from esphome import pins
from esphome.const import CONF_ID, CONF_MODE
from esphome.core import CORE
from .. import (
i2s_audio_ns,
I2SAudioComponent,
I2SAudioOut,
CONF_I2S_AUDIO_ID,
CONF_I2S_DOUT_PIN,
)
CODEOWNERS = ["@jesserockz"]
DEPENDENCIES = ["esp32"]
i2s_audio_ns = cg.esphome_ns.namespace("i2s_audio")
DEPENDENCIES = ["i2s_audio"]
I2SAudioMediaPlayer = i2s_audio_ns.class_(
"I2SAudioMediaPlayer", cg.Component, media_player.MediaPlayer
"I2SAudioMediaPlayer", cg.Component, media_player.MediaPlayer, I2SAudioOut
)
i2s_dac_mode_t = cg.global_ns.enum("i2s_dac_mode_t")
CONF_I2S_DOUT_PIN = "i2s_dout_pin"
CONF_I2S_BCLK_PIN = "i2s_bclk_pin"
CONF_I2S_LRCLK_PIN = "i2s_lrclk_pin"
CONF_MUTE_PIN = "mute_pin"
CONF_AUDIO_ID = "audio_id"
CONF_DAC_TYPE = "dac_type"
@@ -48,34 +51,26 @@ def validate_esp32_variant(config):
CONFIG_SCHEMA = cv.All(
cv.typed_schema(
{
"internal": cv.Schema(
"internal": media_player.MEDIA_PLAYER_SCHEMA.extend(
{
cv.GenerateID(): cv.declare_id(I2SAudioMediaPlayer),
cv.GenerateID(CONF_I2S_AUDIO_ID): cv.use_id(I2SAudioComponent),
cv.Required(CONF_MODE): cv.enum(INTERNAL_DAC_OPTIONS, lower=True),
}
)
.extend(media_player.MEDIA_PLAYER_SCHEMA)
.extend(cv.COMPONENT_SCHEMA),
"external": cv.Schema(
).extend(cv.COMPONENT_SCHEMA),
"external": media_player.MEDIA_PLAYER_SCHEMA.extend(
{
cv.GenerateID(): cv.declare_id(I2SAudioMediaPlayer),
cv.GenerateID(CONF_I2S_AUDIO_ID): cv.use_id(I2SAudioComponent),
cv.Required(
CONF_I2S_DOUT_PIN
): pins.internal_gpio_output_pin_number,
cv.Required(
CONF_I2S_BCLK_PIN
): pins.internal_gpio_output_pin_number,
cv.Required(
CONF_I2S_LRCLK_PIN
): pins.internal_gpio_output_pin_number,
cv.Optional(CONF_MUTE_PIN): pins.gpio_output_pin_schema,
cv.Optional(CONF_MODE, default="mono"): cv.one_of(
*EXTERNAL_DAC_OPTIONS, lower=True
),
}
)
.extend(media_player.MEDIA_PLAYER_SCHEMA)
.extend(cv.COMPONENT_SCHEMA),
).extend(cv.COMPONENT_SCHEMA),
},
key=CONF_DAC_TYPE,
),
@@ -89,19 +84,19 @@ async def to_code(config):
await cg.register_component(var, config)
await media_player.register_media_player(var, config)
parent = await cg.get_variable(config[CONF_I2S_AUDIO_ID])
cg.add(parent.register_audio_out(var))
if config[CONF_DAC_TYPE] == "internal":
cg.add(var.set_internal_dac_mode(config[CONF_MODE]))
else:
cg.add(var.set_dout_pin(config[CONF_I2S_DOUT_PIN]))
cg.add(var.set_bclk_pin(config[CONF_I2S_BCLK_PIN]))
cg.add(var.set_lrclk_pin(config[CONF_I2S_LRCLK_PIN]))
if CONF_MUTE_PIN in config:
pin = await cg.gpio_pin_expression(config[CONF_MUTE_PIN])
cg.add(var.set_mute_pin(pin))
cg.add(var.set_external_dac_channels(2 if config[CONF_MODE] == "stereo" else 1))
if CORE.is_esp32:
cg.add_library("WiFiClientSecure", None)
cg.add_library("HTTPClient", None)
cg.add_library("esphome/ESP32-audioI2S", "2.0.6")
cg.add_build_flag("-DAUDIO_NO_SD_FS")
cg.add_library("WiFiClientSecure", None)
cg.add_library("HTTPClient", None)
cg.add_library("esphome/ESP32-audioI2S", "2.0.6")
cg.add_build_flag("-DAUDIO_NO_SD_FS")
@@ -11,11 +11,19 @@ static const char *const TAG = "audio";
void I2SAudioMediaPlayer::control(const media_player::MediaPlayerCall &call) {
if (call.get_media_url().has_value()) {
if (this->audio_->isRunning())
this->audio_->stopSong();
this->high_freq_.start();
this->audio_->connecttohost(call.get_media_url().value().c_str());
this->state = media_player::MEDIA_PLAYER_STATE_PLAYING;
this->current_url_ = call.get_media_url();
if (this->state == media_player::MEDIA_PLAYER_STATE_PLAYING && this->audio_ != nullptr) {
if (this->audio_->isRunning()) {
this->audio_->stopSong();
}
this->audio_->connecttohost(this->current_url_.value().c_str());
} else {
this->start();
}
}
if (this->i2s_state_ != I2S_STATE_RUNNING) {
return;
}
if (call.get_volume().has_value()) {
this->volume = call.get_volume().value();
@@ -35,7 +43,7 @@ void I2SAudioMediaPlayer::control(const media_player::MediaPlayerCall &call) {
this->state = media_player::MEDIA_PLAYER_STATE_PAUSED;
break;
case media_player::MEDIA_PLAYER_COMMAND_STOP:
this->stop_();
this->stop();
break;
case media_player::MEDIA_PLAYER_COMMAND_MUTE:
this->mute_();
@@ -94,22 +102,51 @@ void I2SAudioMediaPlayer::set_volume_(float volume, bool publish) {
this->volume = volume;
}
void I2SAudioMediaPlayer::stop_() {
if (this->audio_->isRunning())
this->audio_->stopSong();
this->high_freq_.stop();
void I2SAudioMediaPlayer::setup() {
ESP_LOGCONFIG(TAG, "Setting up Audio...");
this->state = media_player::MEDIA_PLAYER_STATE_IDLE;
}
void I2SAudioMediaPlayer::setup() {
ESP_LOGCONFIG(TAG, "Setting up Audio...");
void I2SAudioMediaPlayer::loop() {
switch (this->i2s_state_) {
case I2S_STATE_STARTING:
this->start_();
break;
case I2S_STATE_RUNNING:
this->play_();
break;
case I2S_STATE_STOPPING:
this->stop_();
break;
case I2S_STATE_STOPPED:
break;
}
}
void I2SAudioMediaPlayer::play_() {
this->audio_->loop();
if (this->state == media_player::MEDIA_PLAYER_STATE_PLAYING && !this->audio_->isRunning()) {
this->stop();
}
}
void I2SAudioMediaPlayer::start() { this->i2s_state_ = I2S_STATE_STARTING; }
void I2SAudioMediaPlayer::start_() {
if (this->parent_->try_lock()) {
return; // Waiting for another i2s to return lock
}
#if SOC_I2S_SUPPORTS_DAC
if (this->internal_dac_mode_ != I2S_DAC_CHANNEL_DISABLE) {
this->audio_ = make_unique<Audio>(true, this->internal_dac_mode_);
this->audio_ = make_unique<Audio>(true, this->internal_dac_mode_, this->parent_->get_port());
} else {
#endif
this->audio_ = make_unique<Audio>(false);
this->audio_->setPinout(this->bclk_pin_, this->lrclk_pin_, this->dout_pin_);
this->audio_ = make_unique<Audio>(false, I2S_DAC_CHANNEL_BOTH_EN, this->parent_->get_port());
i2s_pin_config_t pin_config = this->parent_->get_pin_config();
pin_config.data_out_num = this->dout_pin_;
i2s_set_pin(this->parent_->get_port(), &pin_config);
this->audio_->forceMono(this->external_dac_channels_ == 1);
if (this->mute_pin_ != nullptr) {
this->mute_pin_->setup();
@@ -118,16 +155,30 @@ void I2SAudioMediaPlayer::setup() {
#if SOC_I2S_SUPPORTS_DAC
}
#endif
this->state = media_player::MEDIA_PLAYER_STATE_IDLE;
}
void I2SAudioMediaPlayer::loop() {
this->audio_->loop();
if (this->state == media_player::MEDIA_PLAYER_STATE_PLAYING && !this->audio_->isRunning()) {
this->stop_();
this->i2s_state_ = I2S_STATE_RUNNING;
this->high_freq_.start();
if (this->current_url_.has_value()) {
this->audio_->connecttohost(this->current_url_.value().c_str());
this->state = media_player::MEDIA_PLAYER_STATE_PLAYING;
this->publish_state();
}
}
void I2SAudioMediaPlayer::stop() { this->i2s_state_ = I2S_STATE_STOPPING; }
void I2SAudioMediaPlayer::stop_() {
if (this->audio_->isRunning()) {
this->audio_->stopSong();
return;
}
this->audio_ = nullptr;
this->current_url_ = {};
this->parent_->unlock();
this->i2s_state_ = I2S_STATE_STOPPED;
this->high_freq_.stop();
this->state = media_player::MEDIA_PLAYER_STATE_IDLE;
this->publish_state();
}
media_player::MediaPlayerTraits I2SAudioMediaPlayer::get_traits() {
auto traits = media_player::MediaPlayerTraits();
@@ -2,6 +2,10 @@
#ifdef USE_ESP32_FRAMEWORK_ARDUINO
#include "../i2s_audio.h"
#include <driver/i2s.h>
#include "esphome/components/media_player/media_player.h"
#include "esphome/core/component.h"
#include "esphome/core/gpio.h"
@@ -12,7 +16,14 @@
namespace esphome {
namespace i2s_audio {
class I2SAudioMediaPlayer : public Component, public media_player::MediaPlayer {
enum I2SState : uint8_t {
I2S_STATE_STOPPED = 0,
I2S_STATE_STARTING,
I2S_STATE_RUNNING,
I2S_STATE_STOPPING,
};
class I2SAudioMediaPlayer : public Component, public media_player::MediaPlayer, public I2SAudioOut {
public:
void setup() override;
float get_setup_priority() const override { return esphome::setup_priority::LATE; }
@@ -22,8 +33,6 @@ class I2SAudioMediaPlayer : public Component, public media_player::MediaPlayer {
void dump_config() override;
void set_dout_pin(uint8_t pin) { this->dout_pin_ = pin; }
void set_bclk_pin(uint8_t pin) { this->bclk_pin_ = pin; }
void set_lrclk_pin(uint8_t pin) { this->lrclk_pin_ = pin; }
void set_mute_pin(GPIOPin *mute_pin) { this->mute_pin_ = mute_pin; }
#if SOC_I2S_SUPPORTS_DAC
void set_internal_dac_mode(i2s_dac_mode_t mode) { this->internal_dac_mode_ = mode; }
@@ -34,20 +43,24 @@ class I2SAudioMediaPlayer : public Component, public media_player::MediaPlayer {
bool is_muted() const override { return this->muted_; }
void start();
void stop();
protected:
void control(const media_player::MediaPlayerCall &call) override;
void mute_();
void unmute_();
void set_volume_(float volume, bool publish = true);
void stop_();
void start_();
void stop_();
void play_();
I2SState i2s_state_{I2S_STATE_STOPPED};
std::unique_ptr<Audio> audio_;
uint8_t dout_pin_{0};
uint8_t din_pin_{0};
uint8_t bclk_pin_;
uint8_t lrclk_pin_;
GPIOPin *mute_pin_{nullptr};
bool muted_{false};
@@ -59,6 +72,8 @@ class I2SAudioMediaPlayer : public Component, public media_player::MediaPlayer {
uint8_t external_dac_channels_;
HighFrequencyLoopRequester high_freq_;
optional<std::string> current_url_{};
};
} // namespace i2s_audio
@@ -0,0 +1,41 @@
import esphome.config_validation as cv
import esphome.codegen as cg
from esphome import pins
from esphome.const import CONF_ID
from esphome.components import microphone
from .. import (
i2s_audio_ns,
I2SAudioComponent,
I2SAudioIn,
CONF_I2S_AUDIO_ID,
CONF_I2S_DIN_PIN,
)
CODEOWNERS = ["@jesserockz"]
DEPENDENCIES = ["i2s_audio"]
I2SAudioMicrophone = i2s_audio_ns.class_(
"I2SAudioMicrophone", I2SAudioIn, microphone.Microphone, cg.Component
)
CONFIG_SCHEMA = microphone.MICROPHONE_SCHEMA.extend(
{
cv.GenerateID(): cv.declare_id(I2SAudioMicrophone),
cv.GenerateID(CONF_I2S_AUDIO_ID): cv.use_id(I2SAudioComponent),
cv.Required(CONF_I2S_DIN_PIN): pins.internal_gpio_output_pin_number,
}
).extend(cv.COMPONENT_SCHEMA)
async def to_code(config):
var = cg.new_Pvariable(config[CONF_ID])
await cg.register_component(var, config)
parent = await cg.get_variable(config[CONF_I2S_AUDIO_ID])
cg.add(parent.register_audio_in(var))
cg.add(var.set_din_pin(config[CONF_I2S_DIN_PIN]))
await microphone.register_microphone(var, config)
@@ -0,0 +1,101 @@
#include "i2s_audio_microphone.h"
#ifdef USE_ESP32
#include <driver/i2s.h>
#include "esphome/core/hal.h"
#include "esphome/core/log.h"
namespace esphome {
namespace i2s_audio {
static const size_t BUFFER_SIZE = 512;
static const char *const TAG = "i2s_audio.microphone";
void I2SAudioMicrophone::setup() {
ESP_LOGCONFIG(TAG, "Setting up I2S Audio Microphone...");
this->buffer_.resize(BUFFER_SIZE);
}
void I2SAudioMicrophone::start() { this->state_ = microphone::STATE_STARTING; }
void I2SAudioMicrophone::start_() {
if (!this->parent_->try_lock()) {
return; // Waiting for another i2s to return lock
}
i2s_driver_config_t config = {
.mode = (i2s_mode_t) (I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_PDM),
.sample_rate = 16000,
.bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
.channel_format = I2S_CHANNEL_FMT_ONLY_RIGHT,
.communication_format = I2S_COMM_FORMAT_STAND_I2S,
.intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
.dma_buf_count = 4,
.dma_buf_len = 256,
.use_apll = false,
.tx_desc_auto_clear = false,
.fixed_mclk = 0,
.mclk_multiple = I2S_MCLK_MULTIPLE_DEFAULT,
.bits_per_chan = I2S_BITS_PER_CHAN_DEFAULT,
};
i2s_driver_install(this->parent_->get_port(), &config, 0, nullptr);
i2s_pin_config_t pin_config = this->parent_->get_pin_config();
pin_config.data_in_num = this->din_pin_;
i2s_set_pin(this->parent_->get_port(), &pin_config);
this->state_ = microphone::STATE_RUNNING;
this->high_freq_.start();
}
void I2SAudioMicrophone::stop() {
if (this->state_ == microphone::STATE_STOPPED)
return;
this->state_ = microphone::STATE_STOPPING;
}
void I2SAudioMicrophone::stop_() {
i2s_stop(this->parent_->get_port());
i2s_driver_uninstall(this->parent_->get_port());
this->parent_->unlock();
this->state_ = microphone::STATE_STOPPED;
this->high_freq_.stop();
}
void I2SAudioMicrophone::read_() {
size_t bytes_read = 0;
esp_err_t err =
i2s_read(this->parent_->get_port(), this->buffer_.data(), BUFFER_SIZE, &bytes_read, (100 / portTICK_PERIOD_MS));
if (err != ESP_OK) {
ESP_LOGW(TAG, "Error reading from I2S microphone: %s", esp_err_to_name(err));
this->status_set_warning();
return;
}
this->status_clear_warning();
this->data_callbacks_.call(this->buffer_);
}
void I2SAudioMicrophone::loop() {
switch (this->state_) {
case microphone::STATE_STOPPED:
break;
case microphone::STATE_STARTING:
this->start_();
break;
case microphone::STATE_RUNNING:
this->read_();
break;
case microphone::STATE_STOPPING:
this->stop_();
break;
}
}
} // namespace i2s_audio
} // namespace esphome
#endif // USE_ESP32
@@ -0,0 +1,37 @@
#pragma once
#ifdef USE_ESP32
#include "../i2s_audio.h"
#include "esphome/components/microphone/microphone.h"
#include "esphome/core/component.h"
namespace esphome {
namespace i2s_audio {
class I2SAudioMicrophone : public I2SAudioIn, public microphone::Microphone, public Component {
public:
void setup() override;
void start() override;
void stop() override;
void loop() override;
void set_din_pin(uint8_t pin) { this->din_pin_ = pin; }
protected:
void start_();
void stop_();
void read_();
uint8_t din_pin_{0};
std::vector<uint8_t> buffer_;
HighFrequencyLoopRequester high_freq_;
};
} // namespace i2s_audio
} // namespace esphome
#endif // USE_ESP32