diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 00000000..e9a37fcc --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,124 @@ +name: release + +on: + schedule: + - cron: '0 13 * * *' # This schedule runs every 13:00:00Z(21:00:00+08:00) + # The "create tags" trigger is specifically focused on the creation of new tags, while the "push tags" trigger is activated when tags are pushed, including both new tag creations and updates to existing tags. + create: + tags: + - "v*.*.*" # normal release + - "nightly" # the only one mutable tag + +# https://docs.github.com/en/actions/using-jobs/using-concurrency +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + release: + runs-on: [ "ubuntu-latest" ] + steps: + - name: Ensure workspace ownership + run: echo "chown -R $USER $GITHUB_WORKSPACE" && sudo chown -R $USER $GITHUB_WORKSPACE + + # https://github.com/actions/checkout/blob/v3/README.md + - name: Check out code + uses: actions/checkout@v4 + with: + ssh-key: ${{ secrets.MY_DEPLOY_KEY }} + + - name: Prepare release body + run: | + if [[ $GITHUB_EVENT_NAME == 'create' ]]; then + RELEASE_TAG=${GITHUB_REF#refs/tags/} + if [[ $RELEASE_TAG == 'nightly' ]]; then + PRERELEASE=true + else + PRERELEASE=false + fi + echo "Workflow triggered by create tag: $RELEASE_TAG" + else + RELEASE_TAG=nightly + PRERELEASE=true + echo "Workflow triggered by schedule" + fi + echo "RELEASE_TAG=$RELEASE_TAG" >> $GITHUB_ENV + echo "PRERELEASE=$PRERELEASE" >> $GITHUB_ENV + RELEASE_DATETIME=$(date --rfc-3339=seconds) + cat < release_template.md + Release $RELEASE_TAG created from $GITHUB_SHA at $RELEASE_DATETIME + EOF + envsubst < release_template.md > release_body.md + + - name: Move the existing mutable tag + # https://github.com/softprops/action-gh-release/issues/171 + run: | + if [[ $GITHUB_EVENT_NAME == 'schedule' ]]; then + # Determine if a given tag exists and matches a specific Git commit. + # actions/checkout@v4 fetch-tags doesn't work when triggered by schedule + git fetch --tags + if [ "$(git rev-parse -q --verify "refs/tags/$RELEASE_TAG")" = "$GITHUB_SHA" ]; then + echo "mutable tag $RELEASE_TAG exists and matches $GITHUB_SHA" + else + git tag -f $RELEASE_TAG $GITHUB_SHA + git push -f origin $RELEASE_TAG:refs/tags/$RELEASE_TAG + echo "created/moved mutable tag $RELEASE_TAG to $GITHUB_SHA" + fi + fi + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + # https://github.com/marketplace/actions/docker-login + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: infiniflow + password: ${{ secrets.DOCKERHUB_TOKEN }} + + # https://github.com/marketplace/actions/build-and-push-docker-images + - name: Build and push full image + uses: docker/build-push-action@v6 + with: + context: . + push: true + tags: + - infiniflow/ragflow:${{ env.RELEASE_TAG }} + file: Dockerfile + platforms: + - linux/amd64 + - linux/arm64 + + # https://github.com/marketplace/actions/build-and-push-docker-images + - name: Build and push slim image + uses: docker/build-push-action@v6 + with: + context: . + push: true + tags: + - infiniflow/ragflow:${{ env.RELEASE_TAG }}-slim + file: Dockerfile + build-args: + - LIGHTEN=1 + platforms: + - linux/amd64 + - linux/arm64 + + - name: Build ragflow-sdk + if: startsWith(github.ref, 'refs/tags/v') + run: | + apt install -y pipx && \ + pipx install poetry && \ + cd sdk/python && \ + poetry build + + - name: Publish package distributions to PyPI + if: startsWith(github.ref, 'refs/tags/v') + uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages-dir: dist/ + password: ${{ secrets.PYPI_API_TOKEN }} + verbose: true diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c7d53763..4e467fda 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -52,9 +52,8 @@ jobs: - name: Build ragflow:dev-slim run: | RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-$HOME} - cp -r ${RUNNER_WORKSPACE_PREFIX}/huggingface.co ${RUNNER_WORKSPACE_PREFIX}/nltk_data ${RUNNER_WORKSPACE_PREFIX}/libssl*.deb ${RUNNER_WORKSPACE_PREFIX}/tika-server*.jar* ${RUNNER_WORKSPACE_PREFIX}/chrome* ${RUNNER_WORKSPACE_PREFIX}/cl100k_base.tiktoken . sudo docker pull ubuntu:22.04 - sudo docker build --progress=plain -f Dockerfile.slim -t infiniflow/ragflow:dev-slim . + sudo docker build --progress=plain --build-arg LIGHTEN=1 -f Dockerfile -t infiniflow/ragflow:dev-slim . - name: Build ragflow:dev run: | diff --git a/Dockerfile b/Dockerfile index 985eb061..002955f6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,38 +3,58 @@ FROM ubuntu:22.04 AS base USER root SHELL ["/bin/bash", "-c"] -ENV LIGHTEN=0 +ARG LIGHTEN=0 +ENV LIGHTEN=${LIGHTEN} WORKDIR /ragflow -RUN rm -f /etc/apt/apt.conf.d/docker-clean \ - && echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache - -RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \ - apt update && apt-get --no-install-recommends install -y ca-certificates - -# Setup apt mirror site -RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list - -RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \ - apt update && DEBIAN_FRONTEND=noninteractive apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus default-jdk python3-pip pipx \ - libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils unzip libgbm-dev wget git \ - && rm -rf /var/lib/apt/lists/* - -RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && pip3 config set global.trusted-host "pypi.tuna.tsinghua.edu.cn mirrors.pku.edu.cn" && pip3 config set global.extra-index-url "https://mirrors.pku.edu.cn/pypi/web/simple" \ - && pipx install poetry \ - && /root/.local/bin/poetry self add poetry-plugin-pypi-mirror - -# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13 -# aspose-slides on linux/arm64 is unavailable -RUN --mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_amd64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb \ - --mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_arm64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_arm64.deb \ - if [ "$(uname -m)" = "x86_64" ]; then \ - dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \ - elif [ "$(uname -m)" = "aarch64" ]; then \ - dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \ +# Copy models downloaded via download_deps.py +RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow +RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \ + tar --exclude='.*' -cf - \ + /huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \ + /huggingface.co/InfiniFlow/deepdoc \ + | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc +RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \ + if [ "$LIGHTEN" == "0" ]; then \ + (tar -cf - \ + /huggingface.co/BAAI/bge-large-zh-v1.5 \ + /huggingface.co/BAAI/bge-reranker-v2-m3 \ + /huggingface.co/maidalun1020/bce-embedding-base_v1 \ + /huggingface.co/maidalun1020/bce-reranker-base_v1 \ + | tar -xf - --strip-components=2 -C /root/.ragflow) \ fi +# https://github.com/chrismattmann/tika-python +# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache. +RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \ + cp -r /deps/nltk_data /root/ && \ + cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \ + cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 + +ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar" + +# Setup apt +RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ + sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list && \ + rm -f /etc/apt/apt.conf.d/docker-clean && \ + echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \ + apt update && apt --no-install-recommends install -y ca-certificates && \ + rm -rf /var/lib/apt/lists/* + +# cv2 requires libGL.so.1 +RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ + apt update && DEBIAN_FRONTEND=noninteractive apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus default-jdk python3-pip pipx \ + libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils unzip libgbm-dev wget git nginx libgl1 vim less && \ + rm -rf /var/lib/apt/lists/* + +RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \ + pip3 config set global.trusted-host pypi.tuna.tsinghua.edu.cn && \ + pipx install poetry && \ + pipx runpip poetry config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \ + pipx runpip poetry config set global.trusted-host pypi.tuna.tsinghua.edu.cn && \ + /root/.local/bin/poetry self add poetry-plugin-pypi-mirror + ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1 ENV PATH=/root/.local/bin:$PATH # Configure Poetry @@ -45,7 +65,7 @@ ENV POETRY_REQUESTS_TIMEOUT=15 ENV POETRY_PYPI_MIRROR_URL=https://pypi.tuna.tsinghua.edu.cn/simple/ # nodejs 12.22 on Ubuntu 22.04 is too old -RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \ +RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \ curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ apt purge -y nodejs npm && \ apt autoremove && \ @@ -53,6 +73,26 @@ RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked apt install -y nodejs cargo && \ rm -rf /var/lib/apt/lists/* +# Add dependencies of selenium +RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \ + unzip /chrome-linux64.zip && \ + mv chrome-linux64 /opt/chrome && \ + ln -s /opt/chrome/chrome /usr/local/bin/ +RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \ + unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \ + mv chromedriver /usr/local/bin/ && \ + rm -f /usr/bin/google-chrome + +# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13 +# aspose-slides on linux/arm64 is unavailable +RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \ + if [ "$(uname -m)" = "x86_64" ]; then \ + dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \ + elif [ "$(uname -m)" = "aarch64" ]; then \ + dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \ + fi + + # builder stage FROM base AS builder USER root @@ -62,7 +102,7 @@ WORKDIR /ragflow # install dependencies from poetry.lock file COPY pyproject.toml poetry.toml poetry.lock ./ -RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \ +RUN --mount=type=cache,id=ragflow_poetry,target=/root/.cache/pypoetry,sharing=locked \ if [ "$LIGHTEN" == "1" ]; then \ poetry install --no-root; \ else \ @@ -71,20 +111,12 @@ RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sh COPY web web COPY docs docs -RUN --mount=type=cache,id=ragflow_builder_npm,target=/root/.npm,sharing=locked \ +RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \ cd web && npm install --force && npm run build COPY .git /ragflow/.git -RUN current_commit=$(git rev-parse --short HEAD); \ - last_tag=$(git describe --tags --abbrev=0); \ - commit_count=$(git rev-list --count "$last_tag..HEAD"); \ - version_info=""; \ - if [ "$commit_count" -eq 0 ]; then \ - version_info=$last_tag; \ - else \ - version_info="$current_commit($last_tag~$commit_count)"; \ - fi; \ +RUN version_info=$(git describe --tags --match=v* --dirty); \ if [ "$LIGHTEN" == "1" ]; then \ version_info="$version_info slim"; \ else \ @@ -104,49 +136,6 @@ ENV VIRTUAL_ENV=/ragflow/.venv COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV} ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" -# Install python packages' dependencies -# cv2 requires libGL.so.1 -RUN --mount=type=cache,id=ragflow_production_apt,target=/var/cache/apt,sharing=locked \ - apt update && apt install -y --no-install-recommends nginx libgl1 vim less && \ - rm -rf /var/lib/apt/lists/* - -# Copy models downloaded via download_deps.py -RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow -RUN --mount=type=bind,source=huggingface.co,target=/huggingface.co \ - tar --exclude='.*' -cf - \ - /huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \ - /huggingface.co/InfiniFlow/deepdoc \ - | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc -RUN --mount=type=bind,source=huggingface.co,target=/huggingface.co \ - tar -cf - \ - /huggingface.co/BAAI/bge-large-zh-v1.5 \ - /huggingface.co/BAAI/bge-reranker-v2-m3 \ - /huggingface.co/maidalun1020/bce-embedding-base_v1 \ - /huggingface.co/maidalun1020/bce-reranker-base_v1 \ - | tar -xf - --strip-components=2 -C /root/.ragflow - -# Copy nltk data downloaded via download_deps.py -COPY nltk_data /root/nltk_data - -# https://github.com/chrismattmann/tika-python -# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache. -COPY tika-server-standard-3.0.0.jar /ragflow/tika-server-standard.jar -COPY tika-server-standard-3.0.0.jar.md5 /ragflow/tika-server-standard.jar.md5 -ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard.jar" - -# Copy cl100k_base -COPY cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 - -# Add dependencies of selenium -RUN --mount=type=bind,source=chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \ - unzip /chrome-linux64.zip && \ - mv chrome-linux64 /opt/chrome && \ - ln -s /opt/chrome/chrome /usr/local/bin/ -RUN --mount=type=bind,source=chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \ - unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \ - mv chromedriver /usr/local/bin/ && \ - rm -f /usr/bin/google-chrome - ENV PYTHONPATH=/ragflow/ COPY web web diff --git a/Dockerfile.deps b/Dockerfile.deps new file mode 100644 index 00000000..438ab5ce --- /dev/null +++ b/Dockerfile.deps @@ -0,0 +1,10 @@ +# This builds an image that contains the resources needed by Dockerfile +# +FROM ubuntu:22.04 + +# Copy resources downloaded via download_deps.py +COPY chromedriver-linux64-121-0-6167-85 chrome-linux64-121-0-6167-85 cl100k_base.tiktoken libssl1.1_1.1.1f-1ubuntu2_amd64.deb libssl1.1_1.1.1f-1ubuntu2_arm64.deb tika-server-standard-3.0.0.jar tika-server-standard-3.0.0.jar.md5 libssl*.deb / + +COPY nltk_data /nltk_data + +COPY huggingface.co /huggingface.co diff --git a/Dockerfile.slim b/Dockerfile.slim deleted file mode 100644 index 17da40f6..00000000 --- a/Dockerfile.slim +++ /dev/null @@ -1,163 +0,0 @@ -# base stage -FROM ubuntu:22.04 AS base -USER root -SHELL ["/bin/bash", "-c"] - -ENV LIGHTEN=1 - -WORKDIR /ragflow - -RUN rm -f /etc/apt/apt.conf.d/docker-clean \ - && echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache - -RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \ - apt update && apt-get --no-install-recommends install -y ca-certificates - -# Setup apt mirror site -RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list - -RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \ - apt update && DEBIAN_FRONTEND=noninteractive apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus default-jdk python3-pip pipx \ - libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils unzip libgbm-dev wget git \ - && rm -rf /var/lib/apt/lists/* - -RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && pip3 config set global.trusted-host "pypi.tuna.tsinghua.edu.cn mirrors.pku.edu.cn" && pip3 config set global.extra-index-url "https://mirrors.pku.edu.cn/pypi/web/simple" \ - && pipx install poetry \ - && /root/.local/bin/poetry self add poetry-plugin-pypi-mirror - -# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13 -# aspose-slides on linux/arm64 is unavailable -RUN --mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_amd64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb \ - --mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_arm64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_arm64.deb \ - if [ "$(uname -m)" = "x86_64" ]; then \ - dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \ - elif [ "$(uname -m)" = "aarch64" ]; then \ - dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \ - fi - -ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1 -ENV PATH=/root/.local/bin:$PATH -# Configure Poetry -ENV POETRY_NO_INTERACTION=1 -ENV POETRY_VIRTUALENVS_IN_PROJECT=true -ENV POETRY_VIRTUALENVS_CREATE=true -ENV POETRY_REQUESTS_TIMEOUT=15 -ENV POETRY_PYPI_MIRROR_URL=https://pypi.tuna.tsinghua.edu.cn/simple/ - -# nodejs 12.22 on Ubuntu 22.04 is too old -RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \ - curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \ - apt purge -y nodejs npm && \ - apt autoremove && \ - apt update && \ - apt install -y nodejs cargo && \ - rm -rf /var/lib/apt/lists/* - -# builder stage -FROM base AS builder -USER root - -WORKDIR /ragflow - -COPY .git /ragflow/.git - -RUN current_commit=$(git rev-parse --short HEAD); \ - last_tag=$(git describe --tags --abbrev=0); \ - commit_count=$(git rev-list --count "$last_tag..HEAD"); \ - version_info=""; \ - if [ "$commit_count" -eq 0 ]; then \ - version_info=$last_tag; \ - else \ - version_info="$current_commit($last_tag~$commit_count)"; \ - fi; \ - if [ "$LIGHTEN" == "1" ]; then \ - version_info="$version_info slim"; \ - else \ - version_info="$version_info full"; \ - fi; \ - echo "RAGFlow version: $version_info"; \ - echo $version_info > /ragflow/VERSION - -COPY web web -COPY docs docs -RUN --mount=type=cache,id=ragflow_builder_npm,target=/root/.npm,sharing=locked \ - cd web && npm install --force && npm run build - -# install dependencies from poetry.lock file -COPY pyproject.toml poetry.toml poetry.lock ./ - -RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \ - if [ "$LIGHTEN" == "1" ]; then \ - poetry install --no-root; \ - else \ - poetry install --no-root --with=full; \ - fi - -# production stage -FROM base AS production -USER root - -WORKDIR /ragflow - -COPY --from=builder /ragflow/VERSION /ragflow/VERSION - -# Install python packages' dependencies -# cv2 requires libGL.so.1 -RUN --mount=type=cache,id=ragflow_production_apt,target=/var/cache/apt,sharing=locked \ - apt update && apt install -y --no-install-recommends nginx libgl1 vim less && \ - rm -rf /var/lib/apt/lists/* - -COPY web web -COPY api api -COPY conf conf -COPY deepdoc deepdoc -COPY rag rag -COPY agent agent -COPY graphrag graphrag -COPY pyproject.toml poetry.toml poetry.lock ./ - -# Copy models downloaded via download_deps.py -RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow -RUN --mount=type=bind,source=huggingface.co,target=/huggingface.co \ - tar --exclude='.*' -cf - \ - /huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \ - /huggingface.co/InfiniFlow/deepdoc \ - | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc - -# Copy nltk data downloaded via download_deps.py -COPY nltk_data /root/nltk_data - -# https://github.com/chrismattmann/tika-python -# This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache. -COPY tika-server-standard-3.0.0.jar /ragflow/tika-server-standard.jar -COPY tika-server-standard-3.0.0.jar.md5 /ragflow/tika-server-standard.jar.md5 -ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard.jar" - -# Copy cl100k_base -COPY cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 - -# Add dependencies of selenium -RUN --mount=type=bind,source=chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \ - unzip /chrome-linux64.zip && \ - mv chrome-linux64 /opt/chrome && \ - ln -s /opt/chrome/chrome /usr/local/bin/ -RUN --mount=type=bind,source=chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \ - unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \ - mv chromedriver /usr/local/bin/ && \ - rm -f /usr/bin/google-chrome - -# Copy compiled web pages -COPY --from=builder /ragflow/web/dist /ragflow/web/dist - -# Copy Python environment and packages -ENV VIRTUAL_ENV=/ragflow/.venv -COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV} -ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" - -ENV PYTHONPATH=/ragflow/ - -COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template -COPY docker/entrypoint.sh ./entrypoint.sh -RUN chmod +x ./entrypoint.sh - -ENTRYPOINT ["./entrypoint.sh"] diff --git a/README.md b/README.md index 7193cac4..d65f8fa2 100644 --- a/README.md +++ b/README.md @@ -272,9 +272,7 @@ This image is approximately 1 GB in size and relies on external LLM and embeddin ```bash git clone https://github.com/infiniflow/ragflow.git cd ragflow/ -pip3 install huggingface-hub nltk -python3 download_deps.py -docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim . +docker build --build-arg LIGHTEN=1 -f Dockerfile -t infiniflow/ragflow:dev-slim . ``` ## 🔧 Build a Docker image including embedding models @@ -284,8 +282,6 @@ This image is approximately 9 GB in size. As it includes embedding models, it re ```bash git clone https://github.com/infiniflow/ragflow.git cd ragflow/ -pip3 install huggingface-hub nltk -python3 download_deps.py docker build -f Dockerfile -t infiniflow/ragflow:dev . ``` diff --git a/README_id.md b/README_id.md index f382ca1f..b12315e4 100644 --- a/README_id.md +++ b/README_id.md @@ -247,9 +247,7 @@ Image ini berukuran sekitar 1 GB dan bergantung pada aplikasi LLM eksternal dan ```bash git clone https://github.com/infiniflow/ragflow.git cd ragflow/ -pip3 install huggingface-hub nltk -python3 download_deps.py -docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim . +docker build --build-arg LIGHTEN=1 -f Dockerfile -t infiniflow/ragflow:dev-slim . ``` ## 🔧 Membangun Docker Image Termasuk Model Embedding @@ -259,8 +257,6 @@ Image ini berukuran sekitar 9 GB. Karena sudah termasuk model embedding, ia hany ```bash git clone https://github.com/infiniflow/ragflow.git cd ragflow/ -pip3 install huggingface-hub nltk -python3 download_deps.py docker build -f Dockerfile -t infiniflow/ragflow:dev . ``` diff --git a/README_ja.md b/README_ja.md index 6df951d0..8fcda985 100644 --- a/README_ja.md +++ b/README_ja.md @@ -228,9 +228,7 @@ RAGFlow はデフォルトで Elasticsearch を使用して全文とベクトル ```bash git clone https://github.com/infiniflow/ragflow.git cd ragflow/ -pip3 install huggingface-hub nltk -python3 download_deps.py -docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim . +docker build --build-arg LIGHTEN=1 -f Dockerfile -t infiniflow/ragflow:dev-slim . ``` ## 🔧 ソースコードをコンパイルしたDockerイメージ(埋め込みモデルを含む) @@ -240,8 +238,6 @@ docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim . ```bash git clone https://github.com/infiniflow/ragflow.git cd ragflow/ -pip3 install huggingface-hub nltk -python3 download_deps.py docker build -f Dockerfile -t infiniflow/ragflow:dev . ``` diff --git a/README_ko.md b/README_ko.md index fd46dcc3..ec33c78d 100644 --- a/README_ko.md +++ b/README_ko.md @@ -230,9 +230,7 @@ RAGFlow 는 기본적으로 Elasticsearch 를 사용하여 전체 텍스트 및 ```bash git clone https://github.com/infiniflow/ragflow.git cd ragflow/ -pip3 install huggingface-hub nltk -python3 download_deps.py -docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim . +docker build --build-arg LIGHTEN=1 -f Dockerfile -t infiniflow/ragflow:dev-slim . ``` ## 🔧 소스 코드로 Docker 이미지를 컴파일합니다(임베딩 모델 포함) @@ -242,8 +240,6 @@ docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim . ```bash git clone https://github.com/infiniflow/ragflow.git cd ragflow/ -pip3 install huggingface-hub nltk -python3 download_deps.py docker build -f Dockerfile -t infiniflow/ragflow:dev . ``` diff --git a/README_zh.md b/README_zh.md index b9d85952..c03e05ed 100644 --- a/README_zh.md +++ b/README_zh.md @@ -235,9 +235,7 @@ RAGFlow 默认使用 Elasticsearch 存储文本和向量数据. 如果要切换 ```bash git clone https://github.com/infiniflow/ragflow.git cd ragflow/ -pip3 install huggingface-hub nltk -python3 download_deps.py -docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim . +docker build --build-arg LIGHTEN=1 -f Dockerfile -t infiniflow/ragflow:dev-slim . ``` ## 🔧 源码编译 Docker 镜像(包含 embedding 模型) @@ -247,8 +245,6 @@ docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim . ```bash git clone https://github.com/infiniflow/ragflow.git cd ragflow/ -pip3 install huggingface-hub nltk -python3 download_deps.py docker build -f Dockerfile -t infiniflow/ragflow:dev . ``` diff --git a/api/versions.py b/api/versions.py index 46f6faf2..99093492 100644 --- a/api/versions.py +++ b/api/versions.py @@ -42,28 +42,11 @@ def get_ragflow_version() -> str: def get_closest_tag_and_count(): try: # Get the current commit hash - commit_id = ( - subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]) + version_info = ( + subprocess.check_output(["git", "describe", "--tags", "--match=v*", "--dirty"]) .strip() .decode("utf-8") ) - # Get the closest tag - closest_tag = ( - subprocess.check_output(["git", "describe", "--tags", "--abbrev=0"]) - .strip() - .decode("utf-8") - ) - # Get the commit count since the closest tag - process = subprocess.Popen( - ["git", "rev-list", "--count", f"{closest_tag}..HEAD"], - stdout=subprocess.PIPE, - ) - commits_count, _ = process.communicate() - commits_count = int(commits_count.strip()) - - if commits_count == 0: - return closest_tag - else: - return f"{commit_id}({closest_tag}~{commits_count})" + return version_info except Exception: return "unknown" diff --git a/docker/.env b/docker/.env index 19d2cf4d..7a8eb62d 100644 --- a/docker/.env +++ b/docker/.env @@ -81,7 +81,7 @@ SVR_HTTP_PORT=9380 # The RAGFlow Docker image to download. # Defaults to the dev-slim edition, which is the RAGFlow Docker image without embedding models. -RAGFLOW_IMAGE=infiniflow/ragflow:dev-slim +RAGFLOW_IMAGE=infiniflow/ragflow:dev # # To download the RAGFlow Docker image with embedding models, uncomment the following line instead: # RAGFLOW_IMAGE=infiniflow/ragflow:dev diff --git a/docs/guides/develop/build_docker_image.mdx b/docs/guides/develop/build_docker_image.mdx index 07bc7b07..3cfc1666 100644 --- a/docs/guides/develop/build_docker_image.mdx +++ b/docs/guides/develop/build_docker_image.mdx @@ -40,9 +40,7 @@ While we also test RAGFlow on ARM64 platforms, we do not plan to maintain RAGFlo ```bash git clone https://github.com/infiniflow/ragflow.git cd ragflow/ -pip3 install huggingface-hub nltk -python3 download_deps.py -docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim . +docker build --build-arg LIGHTEN=1 -f Dockerfile -t infiniflow/ragflow:dev-slim . ``` @@ -58,8 +56,6 @@ While we also test RAGFlow on ARM64 platforms, we do not plan to maintain RAGFlo ```bash git clone https://github.com/infiniflow/ragflow.git cd ragflow/ -pip3 install huggingface-hub nltk -python3 download_deps.py docker build -f Dockerfile -t infiniflow/ragflow:dev . ``` diff --git a/download_deps.py b/download_deps.py index 36d83e60..c859007c 100644 --- a/download_deps.py +++ b/download_deps.py @@ -1,4 +1,8 @@ #!/usr/bin/env python3 +# +# Install this script's dependencies with pip3: +# pip3 install huggingface-hub nltk + from huggingface_hub import snapshot_download import nltk diff --git a/ubuntu.sources b/ubuntu.sources deleted file mode 100644 index c4a9002d..00000000 --- a/ubuntu.sources +++ /dev/null @@ -1,39 +0,0 @@ -Types: deb -URIs: https://mirrors.tuna.tsinghua.edu.cn/ubuntu -Suites: noble noble-updates noble-backports -Components: main restricted universe multiverse -Signed-By: /usr/share/keyrings/ubuntu-archive-keyring.gpg - -# 默认注释了源码镜像以提高 apt update 速度,如有需要可自行取消注释 -# Types: deb-src -# URIs: https://mirrors.tuna.tsinghua.edu.cn/ubuntu -# Suites: noble noble-updates noble-backports -# Components: main restricted universe multiverse -# Signed-By: /usr/share/keyrings/ubuntu-archive-keyring.gpg - -# 以下安全更新软件源包含了官方源与镜像站配置,如有需要可自行修改注释切换 -Types: deb -URIs: https://mirrors.tuna.tsinghua.edu.cn/ubuntu -Suites: noble-security -Components: main restricted universe multiverse -Signed-By: /usr/share/keyrings/ubuntu-archive-keyring.gpg - -# Types: deb-src -# URIs: https://mirrors.tuna.tsinghua.edu.cn/ubuntu -# Suites: noble-security -# Components: main restricted universe multiverse -# Signed-By: /usr/share/keyrings/ubuntu-archive-keyring.gpg - -# 预发布软件源,不建议启用 - -# Types: deb -# URIs: https://mirrors.tuna.tsinghua.edu.cn/ubuntu -# Suites: noble-proposed -# Components: main restricted universe multiverse -# Signed-By: /usr/share/keyrings/ubuntu-archive-keyring.gpg - -# # Types: deb-src -# # URIs: https://mirrors.tuna.tsinghua.edu.cn/ubuntu -# # Suites: noble-proposed -# # Components: main restricted universe multiverse -# # Signed-By: /usr/share/keyrings/ubuntu-archive-keyring.gpg