diff --git a/api/apps/llm_app.py b/api/apps/llm_app.py index ed5765c1..e48b7b3a 100644 --- a/api/apps/llm_app.py +++ b/api/apps/llm_app.py @@ -122,6 +122,10 @@ def add_llm(): f'"hunyuan_sk": "{req.get("hunyuan_sk", "")}"' + '}' req["api_key"] = api_key return set_api_key() + elif factory == "Tencent Cloud": + api_key = '{' + f'"tencent_cloud_sid": "{req.get("tencent_cloud_sid", "")}", ' \ + f'"tencent_cloud_sk": "{req.get("tencent_cloud_sk", "")}"' + '}' + req["api_key"] = api_key elif factory == "Bedrock": # For Bedrock, due to its special authentication method # Assemble bedrock_ak, bedrock_sk, bedrock_region diff --git a/conf/llm_factories.json b/conf/llm_factories.json index 0082a3f7..dd09833a 100644 --- a/conf/llm_factories.json +++ b/conf/llm_factories.json @@ -3233,6 +3233,13 @@ "tags": "TTS", "status": "1", "llm": [] + }, + { + "name": "Tencent Cloud", + "logo": "", + "tags": "SPEECH2TEXT", + "status": "1", + "llm": [] } ] } diff --git a/rag/llm/__init__.py b/rag/llm/__init__.py index 73fad7cd..adcb53f1 100644 --- a/rag/llm/__init__.py +++ b/rag/llm/__init__.py @@ -128,7 +128,8 @@ Seq2txtModel = { "Tongyi-Qianwen": QWenSeq2txt, "Ollama": OllamaSeq2txt, "Azure-OpenAI": AzureSeq2txt, - "Xinference": XinferenceSeq2txt + "Xinference": XinferenceSeq2txt, + "Tencent Cloud": TencentCloudSeq2txt } TTSModel = { diff --git a/rag/llm/sequence2txt_model.py b/rag/llm/sequence2txt_model.py index 45362ad2..626a65e4 100644 --- a/rag/llm/sequence2txt_model.py +++ b/rag/llm/sequence2txt_model.py @@ -22,7 +22,8 @@ from openai import OpenAI import os import json from rag.utils import num_tokens_from_string - +import base64 +import re class Base(ABC): def __init__(self, key, model_name): @@ -35,6 +36,13 @@ class Base(ABC): response_format="text" ) return transcription.text.strip(), num_tokens_from_string(transcription.text.strip()) + + def audio2base64(self,audio): + if isinstance(audio, bytes): + return base64.b64encode(audio).decode("utf-8") + if isinstance(audio, io.BytesIO): + return base64.b64encode(audio.getvalue()).decode("utf-8") + raise TypeError("The input audio file should be in binary format.") class GPTSeq2txt(Base): @@ -87,3 +95,66 @@ class XinferenceSeq2txt(Base): def __init__(self, key, model_name="", base_url=""): self.client = OpenAI(api_key="xxx", base_url=base_url) self.model_name = model_name + + +class TencentCloudSeq2txt(Base): + def __init__( + self, key, model_name="16k_zh", base_url="https://asr.tencentcloudapi.com" + ): + from tencentcloud.common import credential + from tencentcloud.asr.v20190614 import asr_client + + key = json.loads(key) + sid = key.get("tencent_cloud_sid", "") + sk = key.get("tencent_cloud_sk", "") + cred = credential.Credential(sid, sk) + self.client = asr_client.AsrClient(cred, "") + self.model_name = model_name + + def transcription(self, audio, max_retries=60, retry_interval=5): + from tencentcloud.common.exception.tencent_cloud_sdk_exception import ( + TencentCloudSDKException, + ) + from tencentcloud.asr.v20190614 import models + import time + + b64 = self.audio2base64(audio) + try: + # dispatch disk + req = models.CreateRecTaskRequest() + params = { + "EngineModelType": self.model_name, + "ChannelNum": 1, + "ResTextFormat": 0, + "SourceType": 1, + "Data": b64, + } + req.from_json_string(json.dumps(params)) + resp = self.client.CreateRecTask(req) + + # loop query + req = models.DescribeTaskStatusRequest() + params = {"TaskId": resp.Data.TaskId} + req.from_json_string(json.dumps(params)) + retries = 0 + while retries < max_retries: + resp = self.client.DescribeTaskStatus(req) + if resp.Data.StatusStr == "success": + text = re.sub( + r"\[\d+:\d+\.\d+,\d+:\d+\.\d+\]\s*", "", resp.Data.Result + ).strip() + return text, num_tokens_from_string(text) + elif resp.Data.StatusStr == "failed": + return ( + "**ERROR**: Failed to retrieve speech recognition results.", + 0, + ) + else: + time.sleep(retry_interval) + retries += 1 + return "**ERROR**: Max retries exceeded. Task may still be processing.", 0 + + except TencentCloudSDKException as e: + return "**ERROR**: " + str(e), 0 + except Exception as e: + return "**ERROR**: " + str(e), 0 diff --git a/web/src/assets/svg/llm/tencent-cloud.svg b/web/src/assets/svg/llm/tencent-cloud.svg new file mode 100644 index 00000000..b33a9701 --- /dev/null +++ b/web/src/assets/svg/llm/tencent-cloud.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index 538dae4a..5acc44c3 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -506,6 +506,7 @@ The above is the content you need to summarize.`, vision: 'Does it support Vision?', ollamaLink: 'How to integrate {{name}}', FishAudioLink: 'How to use FishAudio', + TencentCloudLink: 'How to use TencentCloud ASR', volcModelNameMessage: 'Please input your model name!', addEndpointID: 'EndpointID of the model', endpointIDMessage: 'Please input your EndpointID of the model', @@ -529,6 +530,10 @@ The above is the content you need to summarize.`, HunyuanSIDMessage: 'Please input your Secret ID', addHunyuanSK: 'Hunyuan Secret Key', HunyuanSKMessage: 'Please input your Secret Key', + addTencentCloudSID: 'TencentCloud Secret ID', + TencentCloudSIDMessage: 'Please input your Secret ID', + addTencentCloudSK: 'TencentCloud Secret Key', + TencentCloudSKMessage: 'Please input your Secret Key', SparkModelNameMessage: 'Please select Spark model', addSparkAPIPassword: 'Spark APIPassword', SparkAPIPasswordMessage: 'please input your APIPassword', diff --git a/web/src/locales/zh-traditional.ts b/web/src/locales/zh-traditional.ts index 62777e66..9a6850b8 100644 --- a/web/src/locales/zh-traditional.ts +++ b/web/src/locales/zh-traditional.ts @@ -468,6 +468,7 @@ export default { baseUrlNameMessage: '請輸入基礎 Url!', ollamaLink: '如何集成 {{name}}', FishAudioLink: '如何使用Fish Audio', + TencentCloudLink: '如何使用騰訊雲語音識別', volcModelNameMessage: '請輸入模型名稱!', addEndpointID: '模型 EndpointID', endpointIDMessage: '請輸入模型對應的EndpointID', @@ -491,6 +492,10 @@ export default { HunyuanSIDMessage: '請輸入 Secret ID', addHunyuanSK: '混元 Secret Key', HunyuanSKMessage: '請輸入 Secret Key', + addTencentCloudSID: '騰訊雲 Secret ID', + TencentCloudSIDMessage: '請輸入 Secret ID', + addTencentCloudSK: '騰訊雲 Secret Key', + TencentCloudSKMessage: '請輸入 Secret Key', SparkModelNameMessage: '請選擇星火模型!', addSparkAPIPassword: '星火 APIPassword', SparkAPIPasswordMessage: '請輸入 APIPassword', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 2275488b..814acd58 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -485,6 +485,7 @@ export default { baseUrlNameMessage: '请输入基础 Url!', ollamaLink: '如何集成 {{name}}', FishAudioLink: '如何使用Fish Audio', + TencentCloudLink: '如何使用腾讯云语音识别', volcModelNameMessage: '请输入模型名称!', addEndpointID: '模型 EndpointID', endpointIDMessage: '请输入模型对应的EndpointID', @@ -508,6 +509,10 @@ export default { HunyuanSIDMessage: '请输入 Secret ID', addHunyuanSK: '混元 Secret Key', HunyuanSKMessage: '请输入 Secret Key', + addTencentCloudSID: '腾讯云 Secret ID', + TencentCloudSIDMessage: '请输入 Secret ID', + addTencentCloudSK: '腾讯云 Secret Key', + TencentCloudSKMessage: '请输入 Secret Key', SparkModelNameMessage: '请选择星火模型!', addSparkAPIPassword: '星火 APIPassword', SparkAPIPasswordMessage: '请输入 APIPassword', diff --git a/web/src/pages/user-setting/setting-model/Tencent-modal/index.tsx b/web/src/pages/user-setting/setting-model/Tencent-modal/index.tsx new file mode 100644 index 00000000..b278e8b8 --- /dev/null +++ b/web/src/pages/user-setting/setting-model/Tencent-modal/index.tsx @@ -0,0 +1,129 @@ +import { useTranslate } from '@/hooks/common-hooks'; +import { IModalProps } from '@/interfaces/common'; +import { IAddLlmRequestBody } from '@/interfaces/request/llm'; +import { Flex, Form, Input, Modal, Select, Space } from 'antd'; +import omit from 'lodash/omit'; + +type FieldType = IAddLlmRequestBody & { + TencentCloud_sid: string; + TencentCloud_sk: string; +}; + +const { Option } = Select; + +const TencentCloudModal = ({ + visible, + hideModal, + onOk, + loading, + llmFactory, +}: IModalProps & { llmFactory: string }) => { + const [form] = Form.useForm(); + + const { t } = useTranslate('setting'); + + const handleOk = async () => { + const values = await form.validateFields(); + const modelType = values.model_type; + + const data = { + ...omit(values), + model_type: modelType, + llm_factory: llmFactory, + }; + console.info(data); + + onOk?.(data); + }; + + return ( + { + return ( + + + {t('TencentCloudLink')} + + {originNode} + + ); + }} + confirmLoading={loading} + > +
+ + label={t('modelType')} + name="model_type" + initialValue={'speech2text'} + rules={[{ required: true, message: t('modelTypeMessage') }]} + > + + + + label={t('modelName')} + name="llm_name" + initialValue={'16k_zh'} + rules={[{ required: true, message: t('SparkModelNameMessage') }]} + > + + + + label={t('addTencentCloudSID')} + name="TencentCloud_sid" + rules={[{ required: true, message: t('TencentCloudSIDMessage') }]} + > + + + + label={t('addTencentCloudSK')} + name="TencentCloud_sk" + rules={[{ required: true, message: t('TencentCloudSKMessage') }]} + > + + + +
+ ); +}; + +export default TencentCloudModal; diff --git a/web/src/pages/user-setting/setting-model/constant.ts b/web/src/pages/user-setting/setting-model/constant.ts index 4cb4ed9b..dae68f74 100644 --- a/web/src/pages/user-setting/setting-model/constant.ts +++ b/web/src/pages/user-setting/setting-model/constant.ts @@ -36,6 +36,7 @@ export const IconMap = { 'XunFei Spark': 'spark', BaiduYiyan: 'yiyan', 'Fish Audio': 'fish-audio', + 'Tencent Cloud': 'tencent-cloud', }; export const BedrockRegionList = [ diff --git a/web/src/pages/user-setting/setting-model/fish-audio-modal/index.tsx b/web/src/pages/user-setting/setting-model/fish-audio-modal/index.tsx index af82f912..a75765d9 100644 --- a/web/src/pages/user-setting/setting-model/fish-audio-modal/index.tsx +++ b/web/src/pages/user-setting/setting-model/fish-audio-modal/index.tsx @@ -81,14 +81,14 @@ const FishAudioModal = ({ label={t('addFishAudioAK')} - name="FishAudio_ak" + name="fish_audio_ak" rules={[{ required: true, message: t('FishAudioAKMessage') }]} > label={t('addFishAudioRefID')} - name="FishAudio_refid" + name="fish_audio_refid" rules={[{ required: false, message: t('FishAudioRefIDMessage') }]} > diff --git a/web/src/pages/user-setting/setting-model/hooks.ts b/web/src/pages/user-setting/setting-model/hooks.ts index 68391fc4..1c60bbce 100644 --- a/web/src/pages/user-setting/setting-model/hooks.ts +++ b/web/src/pages/user-setting/setting-model/hooks.ts @@ -190,6 +190,33 @@ export const useSubmitHunyuan = () => { }; }; +export const useSubmitTencentCloud = () => { + const { addLlm, loading } = useAddLlm(); + const { + visible: TencentCloudAddingVisible, + hideModal: hideTencentCloudAddingModal, + showModal: showTencentCloudAddingModal, + } = useSetModalState(); + + const onTencentCloudAddingOk = useCallback( + async (payload: IAddLlmRequestBody) => { + const ret = await addLlm(payload); + if (ret === 0) { + hideTencentCloudAddingModal(); + } + }, + [hideTencentCloudAddingModal, addLlm], + ); + + return { + TencentCloudAddingLoading: loading, + onTencentCloudAddingOk, + TencentCloudAddingVisible, + hideTencentCloudAddingModal, + showTencentCloudAddingModal, + }; +}; + export const useSubmitSpark = () => { const { addLlm, loading } = useAddLlm(); const { diff --git a/web/src/pages/user-setting/setting-model/index.tsx b/web/src/pages/user-setting/setting-model/index.tsx index 809c8b79..45455df4 100644 --- a/web/src/pages/user-setting/setting-model/index.tsx +++ b/web/src/pages/user-setting/setting-model/index.tsx @@ -27,6 +27,7 @@ import { import { useCallback, useMemo } from 'react'; import SettingTitle from '../components/setting-title'; import { isLocalLlmFactory } from '../utils'; +import TencentCloudModal from './Tencent-modal'; import ApiKeyModal from './api-key-modal'; import BedrockModal from './bedrock-modal'; import { IconMap } from './constant'; @@ -40,6 +41,7 @@ import { useSubmitOllama, useSubmitSpark, useSubmitSystemModelSetting, + useSubmitTencentCloud, useSubmitVolcEngine, useSubmityiyan, } from './hooks'; @@ -101,7 +103,8 @@ const ModelCard = ({ item, clickApiKey }: IModelCardProps) => { item.name === 'Tencent Hunyuan' || item.name === 'XunFei Spark' || item.name === 'BaiduYiyan' || - item.name === 'Fish Audio' + item.name === 'Fish Audio' || + item.name === 'Tencent Cloud' ? t('addTheModel') : 'API-Key'} @@ -183,6 +186,14 @@ const UserSettingModel = () => { HunyuanAddingLoading, } = useSubmitHunyuan(); + const { + TencentCloudAddingVisible, + hideTencentCloudAddingModal, + showTencentCloudAddingModal, + onTencentCloudAddingOk, + TencentCloudAddingLoading, + } = useSubmitTencentCloud(); + const { SparkAddingVisible, hideSparkAddingModal, @@ -223,11 +234,13 @@ const UserSettingModel = () => { 'XunFei Spark': showSparkAddingModal, BaiduYiyan: showyiyanAddingModal, 'Fish Audio': showFishAudioAddingModal, + 'Tencent Cloud': showTencentCloudAddingModal, }), [ showBedrockAddingModal, showVolcAddingModal, showHunyuanAddingModal, + showTencentCloudAddingModal, showSparkAddingModal, showyiyanAddingModal, showFishAudioAddingModal, @@ -349,6 +362,13 @@ const UserSettingModel = () => { loading={HunyuanAddingLoading} llmFactory={'Tencent Hunyuan'} > +