deepakpant commited on
Commit
8647366
·
1 Parent(s): 04785dd

Complete agentic code

Browse files
Files changed (49) hide show
  1. .devcontainer/devcontainer.json +1 -1
  2. .dockerignore +71 -0
  3. .github/workflows/build-and-deploy.yml +0 -55
  4. .github/workflows/deploy-docs.yml +17 -17
  5. .github/workflows/hugging_face-deploy.yml +27 -0
  6. .github/workflows/publish-package.yml +0 -41
  7. .github/workflows/test-check-build.yml +1 -32
  8. .github/workflows/version-bump-and-release.yml +0 -2
  9. .pre-commit-config.yaml +9 -9
  10. .vscode/launch.json +2 -1
  11. Dockerfile +23 -7
  12. LICENSE +0 -1
  13. Makefile +5 -1
  14. README.md +12 -2
  15. artifacts/processed/.gitkeep +0 -0
  16. docker-compose.yml +13 -19
  17. docs/index.md +3 -2
  18. docs/modules.md +0 -32
  19. jio_savan_music_downloader/__main__.py +30 -0
  20. jio_savan_music_downloader/api/__init__.py +0 -15
  21. jio_savan_music_downloader/api/endpoint.py +0 -31
  22. jio_savan_music_downloader/app.py +53 -0
  23. jio_savan_music_downloader/config/__init__.py +0 -13
  24. jio_savan_music_downloader/config/agents.yaml +9 -0
  25. jio_savan_music_downloader/config/config.py +0 -0
  26. jio_savan_music_downloader/config/tasks.yaml +7 -0
  27. jio_savan_music_downloader/constants/__init__.py +0 -18
  28. jio_savan_music_downloader/core/__init__.py +0 -12
  29. jio_savan_music_downloader/core/embedding_service.py +0 -0
  30. jio_savan_music_downloader/core/llm_service.py +0 -0
  31. jio_savan_music_downloader/core/processor.py +0 -0
  32. jio_savan_music_downloader/crew.py +53 -0
  33. jio_savan_music_downloader/entity/__init__.py +0 -16
  34. jio_savan_music_downloader/exception/__init__.py +0 -61
  35. jio_savan_music_downloader/logger/__init__.py +0 -61
  36. jio_savan_music_downloader/main.py +0 -92
  37. jio_savan_music_downloader/models/request_models.py +0 -0
  38. jio_savan_music_downloader/models/response_models.py +16 -0
  39. jio_savan_music_downloader/services/app_service.py +41 -0
  40. jio_savan_music_downloader/services/document_service.py +0 -0
  41. jio_savan_music_downloader/services/vector_db_service.py +0 -0
  42. jio_savan_music_downloader/services/web_scarapper_service.py +231 -0
  43. artifacts/documents/.gitkeep → jio_savan_music_downloader/tools/__init__.py +0 -0
  44. jio_savan_music_downloader/tools/custom_tool.py +9 -0
  45. jio_savan_music_downloader/utils/__init__.py +0 -15
  46. notebooks/trails.ipynb +0 -0
  47. poetry.lock +0 -0
  48. pyproject.toml +24 -40
  49. tests/test_api_endpoint.py +0 -15
.devcontainer/devcontainer.json CHANGED
@@ -47,7 +47,7 @@
47
  "python.linting.pylintEnabled": true
48
  }
49
  }
50
- }//,
51
  // "hostRequirements": {
52
  // "cpus": 2,
53
  // "memory": "4gb",
 
47
  "python.linting.pylintEnabled": true
48
  }
49
  }
50
+ } //,
51
  // "hostRequirements": {
52
  // "cpus": 2,
53
  // "memory": "4gb",
.dockerignore ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python cache files
2
+ **/__pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+
7
+ # Poetry-related files
8
+ .poetry/
9
+
10
+ # Virtual environments
11
+ **/.venv/
12
+ **/venv/
13
+
14
+ # Node.js dependencies
15
+ **/node_modules/
16
+
17
+ # Mypy cache
18
+ **/.mypy_cache/
19
+
20
+ # Pytest cache
21
+ **/.pytest_cache/
22
+
23
+ # Ruff cache
24
+ **/.ruff_cache/
25
+
26
+ # Coverage files
27
+ **/.coverage
28
+ **/coverage.xml
29
+
30
+ # Development tools and IDE-specific files
31
+ *.idea/
32
+ *.vscode/
33
+ **/.vscode/
34
+ .devcontainer/
35
+ .github/
36
+
37
+ # Exclude notebooks
38
+ notebooks/
39
+
40
+ # Exclude test-related files and directories
41
+ tests/
42
+
43
+ # Build and distribution artifacts
44
+ **/build/
45
+ **/dist/
46
+ *.egg-info/
47
+ .eggs/
48
+
49
+ # Logs and temporary files
50
+ *.log
51
+ *.tmp
52
+ *.swp
53
+ *.swo
54
+
55
+ # Site-related files (if applicable)
56
+ **/site/
57
+
58
+ # Sensitive or configuration files
59
+ .env
60
+ *.pem
61
+ *.key
62
+
63
+ # Markdown and configuration files
64
+ mkdocs.yml
65
+ tox.ini
66
+ pre-commit-config.yml
67
+ codecove.yml
68
+ .gitignore
69
+
70
+ # Docker-related files (optional if Docker Compose is used locally but not in the image)
71
+ docker-compose.yml
.github/workflows/build-and-deploy.yml DELETED
@@ -1,55 +0,0 @@
1
- name: Build and Deploy Docker Image
2
-
3
- on:
4
- workflow_dispatch: {}
5
- release:
6
- types: [published]
7
- branches: [main]
8
- repository_dispatch:
9
- types: [package-release]
10
-
11
- env:
12
- DOCKER_IMAGE: deepak93p/jio_savan_music_downloader # Replace with your DockerHub image
13
- AZURE_WEBAPP_NAME: jio-savan-music-downloader-app # Replace with your Azure Web App name
14
- AZURE_WEBAPP_RG: jio-savan-music-downloader-rg # Replace with your Azure Web App resource group
15
-
16
- jobs:
17
- build-push-docker-image:
18
- runs-on: ubuntu-latest
19
-
20
- steps:
21
- - name: Checkout repository
22
- uses: actions/checkout@v4
23
-
24
- - name: Login to Docker Hub
25
- env:
26
- DOCKER_USER: deepak93p
27
- DOCKER_PWD: ${{ secrets.DOCKERHUB_PUSH_TOKEN }}
28
- run: echo $DOCKER_PWD | docker login -u $DOCKER_USER --password-stdin
29
-
30
- - name: Build and Push Docker Image
31
- run: make bake-container-and-push IMAGE=${{ env.DOCKER_IMAGE }} TAG=${{ github.sha }}
32
-
33
- - name: Clean up Docker system
34
- run: docker system prune -f
35
-
36
- deploy:
37
- runs-on: ubuntu-latest
38
- needs: build-push-docker-image
39
- environment:
40
- name: "production"
41
-
42
- steps:
43
- - name: Azure Login
44
- uses: azure/login@v1
45
- with:
46
- creds: ${{ secrets.AZURE_CREDENTIALS }}
47
-
48
- - name: Deploy to Azure Container Apps
49
- run: |
50
- az containerapp update \
51
- --name ${{ env.AZURE_WEBAPP_NAME }} \
52
- --resource-group ${{ env.AZURE_WEBAPP_RG }} \
53
- --image index.docker.io/${{ env.DOCKER_IMAGE }}:${{ github.sha }} \
54
- --query "properties.configuration.ingress.fqdn" \
55
- -o tsv
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/deploy-docs.yml CHANGED
@@ -1,22 +1,22 @@
1
- name: Deploy Documentation
2
 
3
- on:
4
- release:
5
- types: [published]
6
- branches: [main]
7
- repository_dispatch:
8
- types: [package-release]
9
 
10
- jobs:
11
- deploy-docs:
12
- runs-on: ubuntu-latest
13
 
14
- steps:
15
- - name: Check out
16
- uses: actions/checkout@v4
17
 
18
- - name: Set up the environment
19
- uses: ./.github/actions/setup-poetry-env
20
 
21
- - name: Deploy documentation
22
- run: poetry run mkdocs gh-deploy --force
 
1
+ # name: Deploy Documentation
2
 
3
+ # on:
4
+ # release:
5
+ # types: [published]
6
+ # branches: [main]
7
+ # repository_dispatch:
8
+ # types: [package-release]
9
 
10
+ # jobs:
11
+ # deploy-docs:
12
+ # runs-on: ubuntu-latest
13
 
14
+ # steps:
15
+ # - name: Check out
16
+ # uses: actions/checkout@v4
17
 
18
+ # - name: Set up the environment
19
+ # uses: ./.github/actions/setup-poetry-env
20
 
21
+ # - name: Deploy documentation
22
+ # run: poetry run mkdocs gh-deploy --force
.github/workflows/hugging_face-deploy.yml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy to Hugging Face Spaces
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main # or master, depending on your default branch
7
+
8
+ jobs:
9
+ deploy:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - name: Checkout repository
13
+ uses: actions/checkout@v4
14
+ with:
15
+ fetch-depth: 0
16
+
17
+ - name: Configure Git
18
+ run: |
19
+ git config --global user.email "github-actions[bot]@users.noreply.github.com"
20
+ git config --global user.name "github-actions[bot]"
21
+
22
+ - name: Push to Hugging Face Space
23
+ env:
24
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
25
+ run: |
26
+ git remote add space https://USER:[email protected]/spaces/deepakpant/jio_savan_music_downloader
27
+ git push --force space main
.github/workflows/publish-package.yml DELETED
@@ -1,41 +0,0 @@
1
- name: Publish Package
2
-
3
- on:
4
- release:
5
- types: [published]
6
- branches: [main]
7
- repository_dispatch:
8
- types: [package-release]
9
-
10
-
11
- jobs:
12
- publish-to-pypi:
13
- runs-on: ubuntu-latest
14
-
15
- steps:
16
- - name: Check out
17
- uses: actions/checkout@v4
18
-
19
- - name: Set up the environment
20
- uses: ./.github/actions/setup-poetry-env
21
-
22
- - name: Export tag
23
- id: vars
24
- run: |
25
- if [ "${{ github.event.client_payload.version }}" != "" ]; then
26
- echo "tag=${{ github.event.client_payload.version }}" >> $GITHUB_OUTPUT
27
- elif [ "${{ github.event.inputs.version }}" != "" ]; then
28
- echo "tag=${GITHUB_REF#refs/*/}" >> $GITHUB_OUTPUT
29
- else
30
- echo "No version provided"
31
- exit 1
32
- fi
33
-
34
- - name: Build and publish
35
- run: |
36
- source .venv/bin/activate
37
- poetry version $RELEASE_VERSION
38
- make bake-and-publish
39
- env:
40
- PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
41
- RELEASE_VERSION: ${{ steps.vars.outputs.tag }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/test-check-build.yml CHANGED
@@ -25,40 +25,9 @@ jobs:
25
  - name: Run checks
26
  run: make lint
27
 
28
- tests:
29
- runs-on: ubuntu-latest
30
- needs: quality
31
- strategy:
32
- matrix:
33
- python-version: ["3.12"]
34
- fail-fast: false
35
- defaults:
36
- run:
37
- shell: bash
38
- steps:
39
- - name: Check out
40
- uses: actions/checkout@v4
41
-
42
- - name: Set up the environment
43
- uses: ./.github/actions/setup-poetry-env
44
- with:
45
- python-version: ${{ matrix.python-version }}
46
-
47
- - name: Run tests
48
- run: poetry run pytest tests --cov --cov-config=pyproject.toml --cov-report=xml
49
-
50
- - name: Check typing
51
- run: poetry run mypy
52
-
53
- - name: Upload coverage reports to Codecov with GitHub Action on Python 3.12
54
- uses: codecov/codecov-action@v5
55
- with:
56
- token: ${{ secrets.CODECOV_TOKEN }}
57
- if: ${{ matrix.python-version == '3.12' }}
58
-
59
  build:
60
  runs-on: ubuntu-latest
61
- needs: tests
62
  strategy:
63
  matrix:
64
  python-version: ["3.12"]
 
25
  - name: Run checks
26
  run: make lint
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  build:
29
  runs-on: ubuntu-latest
30
+ needs: [quality]
31
  strategy:
32
  matrix:
33
  python-version: ["3.12"]
.github/workflows/version-bump-and-release.yml CHANGED
@@ -15,7 +15,6 @@ on:
15
  permissions:
16
  contents: write
17
 
18
-
19
  jobs:
20
  bump-version:
21
  runs-on: ubuntu-latest
@@ -59,4 +58,3 @@ jobs:
59
  token: ${{ secrets.GITHUB_TOKEN }}
60
  event-type: package-release
61
  client-payload: '{"version": "${{ steps.bump.outputs.NEW_TAG }}"}'
62
-
 
15
  permissions:
16
  contents: write
17
 
 
18
  jobs:
19
  bump-version:
20
  runs-on: ubuntu-latest
 
58
  token: ${{ secrets.GITHUB_TOKEN }}
59
  event-type: package-release
60
  client-payload: '{"version": "${{ steps.bump.outputs.NEW_TAG }}"}'
 
.pre-commit-config.yaml CHANGED
@@ -22,16 +22,16 @@ repos:
22
  hooks:
23
  - id: prettier
24
 
25
- - repo: https://github.com/psf/black
26
- rev: 23.9.0
27
- hooks:
28
- - id: black
29
 
30
- - repo: https://github.com/timothycrosley/isort
31
- rev: 5.12.0
32
- hooks:
33
- - id: isort
34
- args: [--settings-path=pyproject.toml]
35
 
36
  - repo: https://github.com/PyCQA/pydocstyle
37
  rev: 6.3.0
 
22
  hooks:
23
  - id: prettier
24
 
25
+ # - repo: https://github.com/psf/black
26
+ # rev: 23.9.0
27
+ # hooks:
28
+ # - id: black
29
 
30
+ # - repo: https://github.com/timothycrosley/isort
31
+ # rev: 5.12.0
32
+ # hooks:
33
+ # - id: isort
34
+ # args: [--settings-path=pyproject.toml]
35
 
36
  - repo: https://github.com/PyCQA/pydocstyle
37
  rev: 6.3.0
.vscode/launch.json CHANGED
@@ -5,7 +5,8 @@
5
  "name": "Python Debugger: jio_savan_music_downloader",
6
  "type": "debugpy",
7
  "request": "launch",
8
- "program": "${workspaceFolder}/jio_savan_music_downloader/main.py",
 
9
  "console": "integratedTerminal",
10
  "justMyCode": true,
11
  "jinja": true
 
5
  "name": "Python Debugger: jio_savan_music_downloader",
6
  "type": "debugpy",
7
  "request": "launch",
8
+ "program": "${workspaceFolder}/jio_savan_music_downloader/app.py",
9
+ "args": ["run"],
10
  "console": "integratedTerminal",
11
  "justMyCode": true,
12
  "jinja": true
Dockerfile CHANGED
@@ -1,6 +1,8 @@
1
  # Builder Stage
2
  FROM python:3.12-slim AS builder
3
 
 
 
4
  # Set environment variables for Poetry
5
  ENV POETRY_VERSION=1.6.1 \
6
  POETRY_HOME="/opt/poetry" \
@@ -15,14 +17,14 @@ RUN apt-get update && apt-get install -y --no-install-recommends curl \
15
  WORKDIR /app
16
 
17
  # Copy only the dependency files first to leverage Docker caching
18
- COPY pyproject.toml poetry.lock /app/
19
 
20
  # Install dependencies (only for building the wheel)
21
  RUN poetry config virtualenvs.create false \
22
  && poetry install --no-root --only main
23
 
24
  # Copy the rest of the application code
25
- COPY . /app
26
 
27
  # Build the wheel file
28
  RUN poetry build -f wheel
@@ -30,24 +32,38 @@ RUN poetry build -f wheel
30
  # Runtime Stage
31
  FROM python:3.12-slim AS runtime
32
 
 
 
33
  # Set environment variables
34
  ENV PYTHONUNBUFFERED=1
35
 
36
- # Install runtime dependencies
37
- RUN apt-get update && apt-get install -y --no-install-recommends libpq-dev \
38
- && apt-get clean && rm -rf /var/lib/apt/lists/*
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  # Set working directory
41
  WORKDIR /app
42
 
43
  # Copy the built wheel file from the builder stage
44
- COPY --from=builder /app/dist/*.whl /app/
 
45
 
46
  # Install the wheel file
47
  RUN pip install --no-cache-dir /app/*.whl
48
 
49
  # Expose application port
50
- EXPOSE 80
51
 
52
  # Command to run the application
53
  CMD ["jio_savan_music_downloader"]
 
1
  # Builder Stage
2
  FROM python:3.12-slim AS builder
3
 
4
+ RUN useradd -m -u 1000 user
5
+
6
  # Set environment variables for Poetry
7
  ENV POETRY_VERSION=1.6.1 \
8
  POETRY_HOME="/opt/poetry" \
 
17
  WORKDIR /app
18
 
19
  # Copy only the dependency files first to leverage Docker caching
20
+ COPY --chown=user pyproject.toml poetry.lock /app/
21
 
22
  # Install dependencies (only for building the wheel)
23
  RUN poetry config virtualenvs.create false \
24
  && poetry install --no-root --only main
25
 
26
  # Copy the rest of the application code
27
+ COPY --chown=user . /app
28
 
29
  # Build the wheel file
30
  RUN poetry build -f wheel
 
32
  # Runtime Stage
33
  FROM python:3.12-slim AS runtime
34
 
35
+ RUN useradd -m -u 1000 user
36
+
37
  # Set environment variables
38
  ENV PYTHONUNBUFFERED=1
39
 
40
+ # Install runtime dependencies for Chromium and Selenium
41
+ RUN apt-get update && apt-get install -y \
42
+ chromium \
43
+ chromium-driver \
44
+ gnupg \
45
+ libgconf-2-4 \
46
+ libnss3 \
47
+ unzip \
48
+ wget \
49
+ && rm -rf /var/lib/apt/lists/*
50
+
51
+ # Set environment variables for Selenium to use Chromium
52
+ ENV CHROME_BIN=/usr/bin/chromium
53
+ ENV CHROMEDRIVER_PATH=/usr/bin/chromedriver
54
 
55
  # Set working directory
56
  WORKDIR /app
57
 
58
  # Copy the built wheel file from the builder stage
59
+ COPY --chown=user --from=builder /app/dist/*.whl /app/
60
+
61
 
62
  # Install the wheel file
63
  RUN pip install --no-cache-dir /app/*.whl
64
 
65
  # Expose application port
66
+ EXPOSE 7860
67
 
68
  # Command to run the application
69
  CMD ["jio_savan_music_downloader"]
LICENSE CHANGED
@@ -19,4 +19,3 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
  SOFTWARE.
22
-
 
19
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
  SOFTWARE.
 
Makefile CHANGED
@@ -121,6 +121,10 @@ lint: ## Run code quality tools
121
  @echo "🚀 Linting code with pre-commit"
122
  @poetry run pre-commit run -a
123
  @echo "🚀 Static type checking with mypy"
 
 
 
 
124
  @poetry run mypy
125
  @echo "🚀 Checking for obsolete dependencies with deptry"
126
  @poetry run deptry .
@@ -175,7 +179,7 @@ update: ## Update project dependencies
175
  .PHONY: run
176
  run: ## Run the project's main application
177
  @echo "🚀 Running the project"
178
- @poetry run python $(PROJECT_SLUG)/main.py
179
 
180
  .PHONY: docs-test
181
  docs-test: ## Test if documentation can be built without warnings or errors
 
121
  @echo "🚀 Linting code with pre-commit"
122
  @poetry run pre-commit run -a
123
  @echo "🚀 Static type checking with mypy"
124
+ # @echo "🚀 Sorting imports with isort"
125
+ # @poetry run isort jio_savan_music_downloader/
126
+ # @echo "🚀 Linting code with Ruff"
127
+ # @poetry run ruff format jio_savan_music_downloader/
128
  @poetry run mypy
129
  @echo "🚀 Checking for obsolete dependencies with deptry"
130
  @poetry run deptry .
 
179
  .PHONY: run
180
  run: ## Run the project's main application
181
  @echo "🚀 Running the project"
182
+ @poetry run streamlit run $(PROJECT_SLUG)/app.py
183
 
184
  .PHONY: docs-test
185
  docs-test: ## Test if documentation can be built without warnings or errors
README.md CHANGED
@@ -1,9 +1,19 @@
 
 
 
 
 
 
 
 
 
 
1
  # jio-savan-music-downloader
2
 
3
  This app will download Jio-Savan music.
4
 
5
- - **Github repository**: <https://github.com/DeepakPant93/jio-savan-music-downloader/>
6
- - **Documentation** <https://DeepakPant93.github.io/jio-savan-music-downloader/>
7
 
8
  ## Getting started with your project
9
 
 
1
+ ---
2
+ title: Free Music Downloader
3
+ emoji: 🎵
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: docker
7
+ app_port: 7860
8
+ short_description: AI service for searching and downloading the free music.
9
+ ---
10
+
11
  # jio-savan-music-downloader
12
 
13
  This app will download Jio-Savan music.
14
 
15
+ - **Github repository**: <https://github.com/DeepakPant93/jio-savan-music-downloader/>
16
+ - **Documentation** <https://DeepakPant93.github.io/jio-savan-music-downloader/>
17
 
18
  ## Getting started with your project
19
 
artifacts/processed/.gitkeep DELETED
File without changes
docker-compose.yml CHANGED
@@ -1,22 +1,16 @@
1
  version: "3.9"
2
 
3
  services:
4
- jio_savan_music_downloader:
5
- build:
6
- context: .
7
- dockerfile: Dockerfile
8
- container_name: jio_savan_music_downloader
9
- ports:
10
- - "80:80"
11
- volumes:
12
- - .:/app
13
- environment:
14
- - PYTHONUNBUFFERED=1
15
- healthcheck:
16
- test: ["CMD", "curl", "-f", "http://localhost:80/health" ]
17
- interval: 30s
18
- timeout: 10s
19
- retries: 3
20
- start_period: 10s
21
- command: >
22
- jio_savan_music_downloader
 
1
  version: "3.9"
2
 
3
  services:
4
+ jio_savan_music_downloader:
5
+ build:
6
+ context: .
7
+ dockerfile: Dockerfile
8
+ container_name: jio_savan_music_downloader
9
+ ports:
10
+ - "7860:7860"
11
+ volumes:
12
+ - .:/app
13
+ environment:
14
+ - PYTHONUNBUFFERED=1
15
+ command: >
16
+ jio_savan_music_downloader
 
 
 
 
 
 
docs/index.md CHANGED
@@ -44,6 +44,7 @@ This repository contains a sample Data Science application built with FastAPI, d
44
  git clone https://github.com/DeepakPant93/jio-savan-music-downloader.
45
  cd jio-savan-music-downloader
46
  ```
 
47
  1. Initialize the repository if it's your first time:
48
 
49
  ```bash
@@ -51,13 +52,13 @@ This repository contains a sample Data Science application built with FastAPI, d
51
  make init-repo
52
  ```
53
 
54
- 2. Install dependencies using Poetry:
55
 
56
  ```bash
57
  make bake-env
58
  ```
59
 
60
- 3. Run the FastAPI server:
61
 
62
  ```bash
63
  make run
 
44
  git clone https://github.com/DeepakPant93/jio-savan-music-downloader.
45
  cd jio-savan-music-downloader
46
  ```
47
+
48
  1. Initialize the repository if it's your first time:
49
 
50
  ```bash
 
52
  make init-repo
53
  ```
54
 
55
+ 1. Install dependencies using Poetry:
56
 
57
  ```bash
58
  make bake-env
59
  ```
60
 
61
+ 1. Run the FastAPI server:
62
 
63
  ```bash
64
  make run
docs/modules.md CHANGED
@@ -1,33 +1,5 @@
1
  ::: jio_savan_music_downloader
2
 
3
- ## Configuration
4
-
5
- ::: jio_savan_music_downloader.config
6
-
7
- ## Core
8
-
9
- ::: jio_savan_music_downloader.core
10
-
11
- ## Constants
12
-
13
- ::: jio_savan_music_downloader.constants
14
-
15
- ## Logger
16
-
17
- ::: jio_savan_music_downloader.logger
18
-
19
- ## Utils
20
-
21
- ::: jio_savan_music_downloader.utils
22
-
23
- ## Exceptions
24
-
25
- ::: jio_savan_music_downloader.exception
26
-
27
- ## Entities
28
-
29
- ::: jio_savan_music_downloader.entity
30
-
31
  ## Models
32
 
33
  ::: jio_savan_music_downloader.models
@@ -35,7 +7,3 @@
35
  ## Services
36
 
37
  ::: jio_savan_music_downloader.services
38
-
39
- ## APIs
40
-
41
- ::: jio_savan_music_downloader.api
 
1
  ::: jio_savan_music_downloader
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  ## Models
4
 
5
  ::: jio_savan_music_downloader.models
 
7
  ## Services
8
 
9
  ::: jio_savan_music_downloader.services
 
 
 
 
jio_savan_music_downloader/__main__.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # __main__.py
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ import streamlit.web.cli as stcli
6
+
7
+
8
+ def main() -> None:
9
+ """
10
+ Sets up the environment to run a Streamlit application by modifying
11
+ system arguments and initiating the Streamlit CLI.
12
+
13
+ This function determines the current directory of the script, constructs
14
+ the path to the Streamlit app, updates the system arguments to run the
15
+ app, and then executes the Streamlit command-line interface to start the
16
+ application.
17
+
18
+ Exits the program when the Streamlit application exits, passing the
19
+ appropriate exit code.
20
+ """
21
+
22
+ current_dir = Path(__file__).parent
23
+ streamlit_app_path = current_dir / "app.py"
24
+
25
+ sys.argv = ["streamlit", "run", str(streamlit_app_path), "--server.port", "7860"]
26
+ sys.exit(stcli.main())
27
+
28
+
29
+ if __name__ == "__main__":
30
+ main()
jio_savan_music_downloader/api/__init__.py DELETED
@@ -1,15 +0,0 @@
1
- """
2
-
3
- This module contains API endpoints for a Retrieval-Augmented Generation (RAG) application.
4
-
5
- Endpoints:
6
- - POST /upload-docs: This endpoint allows users to upload and manage documents for processing.
7
- - POST /ask: This endpoint is used for querying the system and receiving context-aware answers based on uploaded documents.
8
-
9
- Features:
10
- - **User-Friendly API**: Simplifies user interaction with the application's core functionalities.
11
- - **Seamless Integration**: Connects document management, vector database, and LLM workflows efficiently.
12
-
13
- Purpose:
14
- - Provides the interface for users to interact with the application's core RAG functionalities.
15
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
jio_savan_music_downloader/api/endpoint.py DELETED
@@ -1,31 +0,0 @@
1
- from fastapi import APIRouter
2
-
3
- router = APIRouter()
4
-
5
-
6
- @router.post("/upload-docs")
7
- async def upload_docs() -> dict:
8
- """
9
- Endpoint to upload documentation. This function encapsulates
10
- the logic to upload the documentation and returns a success
11
- message upon completion.
12
-
13
- Returns:
14
- dict: A dictionary containing a success message.
15
- """
16
-
17
- return {"message": "Training Model Successful"}
18
-
19
-
20
- @router.post("/ask")
21
- async def ask() -> dict:
22
- """
23
- Endpoint to handle prediction requests. This function processes
24
- incoming requests for predictions and returns a success message
25
- upon completion.
26
-
27
- Returns:
28
- dict: A dictionary containing a success message.
29
- """
30
-
31
- return {"message": "Prediction Successful"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
jio_savan_music_downloader/app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+
3
+ import streamlit as st
4
+
5
+ from jio_savan_music_downloader.services.app_service import search_music
6
+
7
+ warnings.filterwarnings("ignore", category=SyntaxWarning, module="pysbd")
8
+
9
+
10
+ # Set page config
11
+ st.set_page_config(page_title="Music Search", page_icon="🎵", layout="wide")
12
+
13
+ # App title
14
+ st.title("🎵 Music Search Results")
15
+
16
+ search_query = st.sidebar.text_input("Enter song name or artist")
17
+
18
+ if search_query:
19
+ # Show loading spinner
20
+ with st.spinner("Searching for music..."):
21
+ music_data = search_music(search_query)
22
+
23
+ if music_data is None or len(music_data) == 0:
24
+ st.warning("No music found. Please try again.")
25
+
26
+ for item in music_data:
27
+ try:
28
+ song_id = item["song_info"]["song_url"].split("/")[-1] # Get unique ID from URL
29
+ song_title = item["song_info"]["title"].split(" - ")[0]
30
+ musicians = item["song_info"]["musician"]
31
+ artists = ", ".join(musicians[:2])
32
+ release_date = item["song_info"]["release_date"]
33
+
34
+ # Display song information in a row
35
+ with st.container():
36
+ # Create columns
37
+ col1, col2, col3, col4 = st.columns([1, 2, 2, 2])
38
+
39
+ # Column 1: Image
40
+ with col1:
41
+ st.image(item["album_image_url"], width=100)
42
+
43
+ # Column 2: Title and Artists
44
+ with col2:
45
+ st.markdown(f"**{song_title}**")
46
+ st.markdown(f"*{artists} | {release_date}*")
47
+
48
+ # Column 4: Audio Player
49
+ with col4:
50
+ st.audio(item["song_info"]["downloadable_url"])
51
+ except Exception as e:
52
+ print(f"An error occurred: {e!s}")
53
+ continue
jio_savan_music_downloader/config/__init__.py DELETED
@@ -1,13 +0,0 @@
1
- """
2
-
3
- This module manages the configuration settings for the application.
4
-
5
- Purpose:
6
- - Centralizes configuration management for environment variables, application settings, and dependencies.
7
- - Ensures consistent and secure access to critical configurations.
8
-
9
- Features:
10
- - Loads environment variables and provides access to them.
11
- - Validates and parses configuration settings using Pydantic models.
12
- - Supports dynamic updates for configuration changes during runtime (if applicable).
13
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
jio_savan_music_downloader/config/agents.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ music_researcher:
2
+ role: >
3
+ Senior Music Researcher at {website}
4
+ goal: >
5
+ Discover the latest music from {website} on {topic}
6
+ backstory: >
7
+ You're a seasoned music researcher with a passion for discovering new
8
+ music from {website}. Known for your ability to find the most relevant
9
+ information and present it in a clear and concise manner.
jio_savan_music_downloader/config/config.py DELETED
File without changes
jio_savan_music_downloader/config/tasks.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ music_research_task:
2
+ description: >
3
+ Find the latest music from internet for {website} on {topic}. make sure you find the most
4
+ relevant information and present it in a clear and concise manner.
5
+ expected_output: >
6
+ A list of atleast 5 latest music links in a JSON with (title, link) format from internet on {topic}. Search the music in internet and get the most relevent {website} link.
7
+ agent: music_researcher
jio_savan_music_downloader/constants/__init__.py DELETED
@@ -1,18 +0,0 @@
1
- """
2
-
3
- This package defines global constants used throughout the project. Constants
4
- help in maintaining consistency and avoiding magic numbers or strings in the codebase.
5
-
6
- Usage:
7
- Import the required constants as needed:
8
-
9
- Example:
10
- ```python
11
- from constants import APP_NAME, ENVIRONMENT
12
- from constants import STATUS_OK, STATUS_BAD_REQUEST
13
- ```
14
-
15
- Purpose:
16
- - Centralizes constant values for maintainability and reusability.
17
- - Reduces hard-coded values in the project.
18
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
jio_savan_music_downloader/core/__init__.py DELETED
@@ -1,12 +0,0 @@
1
- """
2
-
3
- This module serves as the core of the application, integrating essential services for the Retrieval-Augmented Generation (RAG) workflow.
4
-
5
- Modules:
6
- EmbeddingService: Handles document embedding generation and retrieval for context-aware processing.
7
- LLMService: Interfaces with large language models (LLMs) to generate answers to user queries.
8
- Processor: Orchestrates workflows between the embedding and LLM services, ensuring seamless data flow and processing.
9
-
10
- Purpose:
11
- - Provides the foundational logic and services required to power the application's RAG functionalities.
12
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
jio_savan_music_downloader/core/embedding_service.py DELETED
File without changes
jio_savan_music_downloader/core/llm_service.py DELETED
File without changes
jio_savan_music_downloader/core/processor.py DELETED
File without changes
jio_savan_music_downloader/crew.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai import Agent, Crew, Process, Task
2
+ from crewai.project import CrewBase, agent, crew, task
3
+
4
+ from jio_savan_music_downloader.models.response_models import MusicDetails
5
+ from jio_savan_music_downloader.tools.custom_tool import search_tool
6
+
7
+
8
+ @CrewBase
9
+ class JioSavanMusicDownloaderAgent:
10
+ """JioSavanMusicDownloaderAgent crew"""
11
+
12
+ agents_config = "config/agents.yaml"
13
+ tasks_config = "config/tasks.yaml"
14
+
15
+ @agent
16
+ def music_researcher(self) -> Agent:
17
+ """
18
+ Creates a music researcher agent.
19
+
20
+ This agent is responsible for searching for the specified music on JioSaavn
21
+ and returning the results in a structured format.
22
+
23
+ :return: An instance of the Agent class
24
+ """
25
+ return Agent(config=self.agents_config["music_researcher"], verbose=True)
26
+
27
+ @task
28
+ def music_research_task(self) -> Task:
29
+ """
30
+ Creates the music research task.
31
+
32
+ This task is responsible for searching for the specified music on JioSaavn
33
+ and returning the results in a structured format.
34
+
35
+ :return: An instance of the Task class
36
+ """
37
+
38
+ return Task(
39
+ config=self.tasks_config["music_research_task"],
40
+ tools=[search_tool],
41
+ output_json=MusicDetails,
42
+ )
43
+
44
+ @crew
45
+ def crew(self) -> Crew:
46
+ """Creates the JioSavanMusicDownloaderAgent crew"""
47
+
48
+ return Crew(
49
+ agents=self.agents, # Automatically created by the @agent decorator
50
+ tasks=self.tasks, # Automatically created by the @task decorator
51
+ process=Process.sequential,
52
+ verbose=False,
53
+ )
jio_savan_music_downloader/entity/__init__.py DELETED
@@ -1,16 +0,0 @@
1
- """
2
-
3
- This module defines the database entities used across the application for data storage and retrieval.
4
-
5
- Purpose:
6
- - Provides structured definitions for database models.
7
- - Facilitates interaction with the database by mapping application data to database schemas.
8
-
9
- Components:
10
- - DB Entities: Defines entities that represent database tables or collections, including fields and relationships.
11
-
12
- Features:
13
- - Centralized entity definitions for consistent database operations.
14
- - Supports validation and serialization of database records.
15
- - Designed to integrate seamlessly with the database layer, ensuring reliable data handling.
16
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
jio_savan_music_downloader/exception/__init__.py DELETED
@@ -1,61 +0,0 @@
1
- """
2
-
3
- This module defines custom exception classes and error-handling utilities tailored
4
- to the needs of a data science pipeline. It helps standardize error reporting, improve
5
- debugging, and provide meaningful feedback during model training, data preprocessing,
6
- and prediction processes.
7
-
8
- Classes:
9
- DataValidationError: Raised when input data fails validation checks.
10
- ModelTrainingError: Raised during errors in the model training phase, such as convergence issues or invalid configurations.
11
- PredictionError: Raised when the prediction pipeline encounters issues, such as missing features or incompatible input formats.
12
- PipelineExecutionError: Raised for generic errors occurring during pipeline execution.
13
-
14
- Usage:
15
- Import and use the exceptions in various stages of the data science pipeline:
16
-
17
- Example:
18
- ```python
19
- from exception import DataValidationError, ModelTrainingError
20
-
21
- try:
22
- validate_data(input_data)
23
- except DataValidationError as e:
24
- logger.error(f"Data validation failed: {e}")
25
- raise
26
- ```
27
-
28
- Features:
29
- - Custom exceptions for specific pipeline stages, ensuring meaningful error reporting.
30
- - Enables targeted exception handling, reducing debugging time.
31
- - Provides a consistent structure for error messages across the project.
32
-
33
- Purpose:
34
- - To define project-specific exceptions for common error scenarios in the pipeline.
35
- - To improve the robustness and reliability of the pipeline by enabling clear error handling.
36
- - To make the debugging process more intuitive by raising descriptive errors.
37
-
38
- Examples:
39
- - **Data Validation**: Raise a `DataValidationError` if the input data schema is incorrect or missing required fields.
40
- - **Model Training**: Raise a `ModelTrainingError` if the model fails to converge due to invalid hyperparameters.
41
- - **Prediction**: Raise a `PredictionError` when incompatible input data is passed to the model.
42
-
43
- Additional Notes:
44
- - Use these exceptions in conjunction with logging to provide detailed error information.
45
- - Ensure that custom exceptions are raised with meaningful messages to assist in debugging and error resolution.
46
- """
47
-
48
- from fastapi import HTTPException
49
-
50
-
51
- class CustomException(HTTPException):
52
- """Custom exception class for handling errors in the data science pipeline."""
53
-
54
- def __init__(self, status_code: int, detail: str):
55
- """
56
- Custom exception for handling API errors.
57
-
58
- :param status_code: The HTTP status code to return.
59
- :param detail: A string describing the error in detail.
60
- """
61
- super().__init__(status_code=status_code, detail=detail)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
jio_savan_music_downloader/logger/__init__.py DELETED
@@ -1,61 +0,0 @@
1
- """
2
-
3
- This module provides centralized logging utilities for the data science pipeline.
4
- It standardizes logging practices, ensures consistency across components, and facilitates
5
- easy debugging and monitoring of the pipeline's execution, including data preprocessing,
6
- model training, evaluation, and predictions.
7
-
8
- Functions:
9
- setup_logging: Configures the logging system, including log format, level, and output destinations.
10
- get_logger: Returns a logger instance for a specific module or stage of the pipeline.
11
-
12
- Features:
13
- - Centralized logging configuration to maintain consistency.
14
- - Support for different log levels (INFO, DEBUG, WARNING, ERROR, CRITICAL).
15
- - Ability to write logs to files, console, or external monitoring systems.
16
- - Timestamped log entries for accurate tracking of events.
17
- - Integration with custom exception handling for detailed error reporting.
18
-
19
- Usage:
20
- Use this module to log messages in a standardized manner across the project:
21
-
22
- Example:
23
- ```python
24
- from src.logging import logger
25
-
26
- logger.info("Starting the model training process...")
27
- logger.error("An error occurred during data validation.")
28
- ```
29
-
30
- Purpose:
31
- - To provide a standardized mechanism for logging messages throughout the data science pipeline.
32
- - To assist in debugging by capturing detailed logs of each pipeline stage.
33
- - To enable seamless integration with monitoring and alerting systems.
34
-
35
- Best Practices:
36
- - Use appropriate log levels to categorize messages (e.g., DEBUG for detailed information, ERROR for issues).
37
- - Ensure logs include sufficient context, such as function names or input details, to aid debugging.
38
- - Regularly monitor log files for anomalies or errors in the pipeline.
39
-
40
- Additional Notes:
41
- - The `setup_logging` function can be configured to write logs to multiple destinations, such as files or cloud services.
42
- - The module can be extended to integrate with third-party monitoring tools like Elasticsearch, Splunk, or Datadog.
43
- """
44
-
45
- import logging
46
- import os
47
- import sys
48
-
49
- logging_str = "[%(asctime)s: %(levelname)s: %(module)s: %(message)s]"
50
- log_dir = "logs"
51
- log_filepath = os.path.join(log_dir,"jio-savan-music-downloader.log")
52
- os.makedirs(log_dir, exist_ok=True)
53
-
54
-
55
- logging.basicConfig(
56
- level=logging.INFO,
57
- format=logging_str,
58
- handlers=[logging.FileHandler(log_filepath), logging.StreamHandler(sys.stdout)],
59
- )
60
-
61
- logger = logging.getLogger("jio-savan-music-downloader-logger")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
jio_savan_music_downloader/main.py DELETED
@@ -1,92 +0,0 @@
1
- from fastapi import FastAPI, responses
2
- from fastapi.openapi.utils import get_openapi
3
- from fastapi_health import health
4
-
5
- from jio_savan_music_downloader.api.endpoint import router
6
-
7
- __version__ = "0.0.1"
8
-
9
- app = FastAPI(
10
- title="jio-savan-music-downloader APIs",
11
- description="This app will download Jio-Savan music.",
12
- version=__version__,
13
- docs_url="/docs",
14
- redoc_url="/redoc",
15
- )
16
-
17
-
18
- app = FastAPI()
19
-
20
- @app.get("/", include_in_schema=False)
21
- async def root() -> responses.RedirectResponse:
22
- """
23
- Redirects the root URL to the API documentation page.
24
-
25
- Returns:
26
- RedirectResponse: A response object that redirects the client to the "/docs" URL.
27
- """
28
-
29
- return responses.RedirectResponse("/docs")
30
-
31
-
32
- # Health Check
33
- async def health_check() -> dict:
34
- """
35
- Checks the health of the API.
36
-
37
- This endpoint checks the health of the API and returns a simple status
38
- message. It is intended to be used by load balancers or other monitoring
39
- systems to determine if the API is functional.
40
-
41
- Returns:
42
- dict: A dictionary containing the status of the API.
43
- """
44
- return {"status": "healthy"}
45
-
46
-
47
- # Include routers
48
- app.add_api_route(
49
- "/health",
50
- health([health_check]),
51
- tags=["Management"],
52
- description="Management APIs",
53
- )
54
- app.include_router(router, prefix="/api/v1", tags=["Operations"])
55
-
56
-
57
- def _custom_openapi() -> dict:
58
- if app.openapi_schema:
59
- return app.openapi_schema
60
- openapi_schema = get_openapi(
61
- title="jio-savan-music-downloader APIs",
62
- description="This app will download Jio-Savan music.",
63
- version=__version__,
64
- routes=app.routes,
65
- )
66
- app.openapi_schema = openapi_schema
67
- return app.openapi_schema
68
-
69
-
70
- app.openapi = _custom_openapi
71
-
72
-
73
- def main() -> None:
74
- """
75
- The main entry point of the application.
76
-
77
- This function starts the FastAPI server using Uvicorn. It serves the API
78
- on the specified host and port. The function is intended to be run
79
- directly when the script is executed.
80
-
81
- Notes:
82
- - The 'nosec B104' comment is used to suppress a security warning
83
- related to binding to all network interfaces.
84
- """
85
-
86
- import uvicorn
87
-
88
- uvicorn.run(app, host="0.0.0.0", port=80) # nosec B104
89
-
90
-
91
- if __name__ == "__main__":
92
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
jio_savan_music_downloader/models/request_models.py DELETED
File without changes
jio_savan_music_downloader/models/response_models.py CHANGED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+
4
+ class MusicLink(BaseModel):
5
+ """
6
+ A class representing a music link."""
7
+
8
+ title: str
9
+ link: str
10
+
11
+
12
+ class MusicDetails(BaseModel):
13
+ """
14
+ A class representing music details."""
15
+
16
+ links: list[MusicLink]
jio_savan_music_downloader/services/app_service.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+
3
+ from jio_savan_music_downloader.crew import JioSavanMusicDownloaderAgent
4
+ from jio_savan_music_downloader.services.web_scarapper_service import scrape_pages
5
+
6
+ warnings.filterwarnings("ignore", category=SyntaxWarning, module="pysbd")
7
+
8
+
9
+ def search_music(query: str) -> list[dict]:
10
+ music_details = []
11
+ try:
12
+ # Search the internet for music
13
+ print(f"Searching for music: {query}")
14
+ search_results = search_internet(query)
15
+ print(f"Found {len(search_results)} results")
16
+
17
+ # Get music details
18
+ print("Getting music details")
19
+ music_details = get_music_details(search_results)
20
+ print(f"Got details for {len(music_details)} songs")
21
+ print(f"Music details: {music_details}")
22
+ print("Done")
23
+ except Exception as e:
24
+ print(f"An error occurred: {e!s}")
25
+
26
+ return music_details
27
+
28
+
29
+ def search_internet(query: str) -> list[dict]:
30
+ # Run the crew
31
+ inputs = {"website": "https://www.jiosaavn.com", "topic": query}
32
+ result = JioSavanMusicDownloaderAgent().crew().kickoff(inputs=inputs)
33
+ links = result.to_dict().get("links", [])
34
+ return links if isinstance(links, list) else []
35
+
36
+
37
+ def get_music_details(songs: list[dict]) -> list[dict]:
38
+ # Get music details by scrapping the pages
39
+
40
+ links: list[str] = [album["link"] for album in songs]
41
+ return scrape_pages(links)
jio_savan_music_downloader/services/document_service.py DELETED
File without changes
jio_savan_music_downloader/services/vector_db_service.py DELETED
File without changes
jio_savan_music_downloader/services/web_scarapper_service.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from datetime import datetime
3
+ from typing import Any
4
+ from urllib.parse import urlparse
5
+
6
+ import requests
7
+ from bs4 import BeautifulSoup
8
+ from selenium import webdriver
9
+ from selenium.common.exceptions import TimeoutException
10
+ from selenium.webdriver.chrome.service import Service
11
+ from selenium.webdriver.common.by import By
12
+ from selenium.webdriver.support import expected_conditions as EC
13
+ from selenium.webdriver.support.ui import WebDriverWait
14
+
15
+
16
+ def _setup_driver() -> webdriver.Chrome:
17
+ # Setup Chrome WebDriver (or other driver)
18
+ options = webdriver.ChromeOptions()
19
+
20
+ # Essential container arguments
21
+ options.add_argument("--headless=new")
22
+ options.add_argument("--disable-gpu")
23
+ options.add_argument("--no-sandbox")
24
+ options.add_argument("--disable-dev-shm-usage")
25
+
26
+ # JavaScript-specific configurations
27
+ options.add_argument("--enable-javascript")
28
+ options.add_argument("--disable-web-security")
29
+ options.add_argument("--allow-running-insecure-content")
30
+
31
+ # Performance optimizations
32
+ options.add_argument("--window-size=1920,1080")
33
+ options.add_argument("--disable-extensions")
34
+ options.add_argument("--disable-setuid-sandbox")
35
+
36
+ # Memory management
37
+ options.add_argument("--disable-dev-tools")
38
+ options.add_argument("--no-zygote")
39
+ options.add_argument("--single-process")
40
+
41
+ # Handle Chrome Driver installation
42
+ try:
43
+ # For container environments, specify the Chrome version
44
+ print("Setting up Chrome WebDriver")
45
+ chrome_service = Service("/usr/bin/chromedriver")
46
+ driver = webdriver.Chrome(service=chrome_service, options=options)
47
+ driver = webdriver.Chrome(options=options)
48
+ except Exception as e:
49
+ # Fallback to direct path if ChromeDriverManager fails
50
+ print(f"An error occurred: {e!s}")
51
+ print("Falling back to direct path")
52
+
53
+ driver = webdriver.Chrome(options=options)
54
+ return driver
55
+
56
+
57
+ def _get_downloadable_audio_link(url: str) -> str:
58
+ if not url:
59
+ return ""
60
+
61
+ # Extract the album ID and file ID from the URL
62
+ parsed_url = urlparse(url)
63
+ path_parts = parsed_url.path.split("/")
64
+ album_id = path_parts[-2]
65
+ file_id = path_parts[-1].split(".")[0]
66
+
67
+ # Construct the downloadable audio link
68
+ return f"https://aac.saavncdn.com/{album_id}/{file_id}.mp4"
69
+
70
+
71
+ def _extract_musician_name(url: str) -> str:
72
+ return url.split("/")[-2].replace("-songs", "").replace("-", " ").title()
73
+
74
+
75
+ def scrape_dynamic_page(url: str, wait_time: int = 5) -> dict[str, Any]:
76
+ """
77
+ Scrape a webpage including content loaded by JavaScript
78
+
79
+ Parameters:
80
+ url (str): The URL to scrape
81
+ wait_time (int): Maximum time to wait for dynamic content to load
82
+
83
+ Returns:
84
+ dict: Dictionary containing various elements from the page
85
+ """
86
+ driver = _setup_driver()
87
+
88
+ try:
89
+ # Load the page
90
+ driver.get(url)
91
+
92
+ # Wait for the button to be present
93
+ button = WebDriverWait(driver, wait_time).until(
94
+ EC.presence_of_element_located((By.CSS_SELECTOR, 'a.c-btn.c-btn--primary[data-btn-icon="q"]'))
95
+ )
96
+
97
+ # Check visibility and enablement
98
+ is_displayed = button.is_displayed()
99
+ is_enabled = button.is_enabled()
100
+ print(f"Button displayed: {is_displayed}, Button enabled: {is_enabled}")
101
+
102
+ if is_displayed and is_enabled:
103
+ # Click the button
104
+ driver.execute_script("arguments[0].scrollIntoView(true);", button)
105
+ # driver.execute_script("arguments[0].click();", button)
106
+ try:
107
+ button.click()
108
+ except Exception as e:
109
+ print(f"An error occurred: {e!s}")
110
+ driver.execute_script("arguments[0].click();", button)
111
+ else:
112
+ print("Button is not interactable!")
113
+
114
+ # Wait a moment for any JavaScript updates
115
+ time.sleep(5)
116
+
117
+ # Get the updated HTML
118
+ html_content = driver.page_source
119
+ soup = BeautifulSoup(html_content, "html.parser")
120
+
121
+ # Extract elements
122
+ details = {
123
+ "album_title": soup.title.text if soup.title else "",
124
+ "description": soup.find("meta", {"name": "description"})["content"]
125
+ if soup.find("meta", {"name": "description"})
126
+ else "",
127
+ "album_description": soup.find("meta", {"property": "og:description"})["content"]
128
+ if soup.find("meta", {"property": "og:description"})
129
+ else "",
130
+ "album_url": soup.find("meta", {"property": "music:album"})["content"]
131
+ if soup.find("meta", {"property": "music:album"})
132
+ else "",
133
+ "album_image_url": soup.find("meta", {"property": "twitter:image"})["content"]
134
+ if soup.find("meta", {"property": "twitter:image"})
135
+ else "",
136
+ "song_info": {
137
+ "name": soup.title.text if soup.title else "",
138
+ "title": soup.find("meta", {"property": "twitter:title"})["content"]
139
+ if soup.find("meta", {"property": "twitter:title"})
140
+ else "",
141
+ "musician": [
142
+ _extract_musician_name(musician["content"])
143
+ for musician in soup.find_all("meta", {"property": "music:musician"})
144
+ ],
145
+ "release_date": datetime.strptime(
146
+ soup.find("meta", {"property": "music:release_date"})["content"],
147
+ "%Y-%m-%d",
148
+ ).strftime("%B %d, %Y")
149
+ if soup.find("meta", {"property": "music:release_date"})
150
+ else "",
151
+ "song_url": soup.find("meta", {"property": "twitter:url"})["content"]
152
+ if soup.find("meta", {"property": "twitter:url"})
153
+ else "",
154
+ "description": soup.find("meta", {"property": "twitter:description"})["content"]
155
+ if soup.find("meta", {"property": "twitter:description"})
156
+ else "",
157
+ "downloadable_url": _get_downloadable_audio_link(
158
+ soup.find("audio").find("source")["src"] if soup.find("audio").find("source") else ""
159
+ ),
160
+ "song_lyrics_url": "https://www.jiosaavn.com" + soup.find("a", title="Song Lyrics")["href"]
161
+ if soup.find("a", title="Song Lyrics")
162
+ else "",
163
+ },
164
+ }
165
+ except TimeoutException:
166
+ print(f"Timeout waiting for page to load: {url}")
167
+ return {}
168
+ except Exception as e:
169
+ print(f"An error occurred: {e!s}")
170
+ return {}
171
+ else:
172
+ return details
173
+ finally:
174
+ driver.quit()
175
+
176
+
177
+ def scrape_pages(urls: list[str]) -> list[dict]:
178
+ """
179
+ Scrape multiple webpages and return a list of elements
180
+
181
+ Parameters:
182
+ urls (list of str): List of URLs to scrape
183
+ wait_time (int): Maximum time to wait for dynamic content to load
184
+
185
+ Returns:
186
+ list of dict: List of dictionaries containing various elements from each page
187
+ """
188
+ results = []
189
+ for url in urls:
190
+ details = scrape_dynamic_page(url)
191
+ if details:
192
+ results.append(details)
193
+ return results
194
+
195
+
196
+ def download_file(url: str) -> None:
197
+ """
198
+ Download a file from a URL and save it to a local file
199
+
200
+ Parameters:
201
+ url (str): URL of the file to be downloaded
202
+
203
+ Returns:
204
+ None
205
+ """
206
+ try:
207
+ response = requests.get(url, stream=True, timeout=10)
208
+
209
+ # Check if the request was successful
210
+ if response.status_code == 200:
211
+ # Open a local file with the specified filename in binary write mode
212
+ filename = _get_filename_name(url)
213
+ filename = f"downloads/{filename}.mp4"
214
+
215
+ with open(filename, "wb") as file:
216
+ # Write the content of the response to the file in chunks
217
+ for chunk in response.iter_content(chunk_size=8192):
218
+ file.write(chunk)
219
+ print(f"File downloaded successfully as '{filename}'")
220
+ else:
221
+ print(f"Failed to download file. HTTP Status Code: {response.status_code}")
222
+ except requests.exceptions.Timeout:
223
+ print(f"Request to {url} timed out.")
224
+ except requests.exceptions.RequestException as e:
225
+ print(f"Request to {url} failed: {e}")
226
+
227
+
228
+ def _get_filename_name(url: str) -> str:
229
+ parsed_url = urlparse(url)
230
+ path_parts = parsed_url.path.split("/")
231
+ return path_parts[2]
artifacts/documents/.gitkeep → jio_savan_music_downloader/tools/__init__.py RENAMED
File without changes
jio_savan_music_downloader/tools/custom_tool.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from crewai_tools import FileWriterTool, ScrapeWebsiteTool, SerperDevTool
2
+
3
+ jio_savan_scapper_tool = ScrapeWebsiteTool(website_url="https://www.jiosaavn.com")
4
+ file_writer_tool = FileWriterTool()
5
+ search_tool = SerperDevTool(
6
+ country="in", # Set to 'in' for India
7
+ locale="en", # Set locale to English
8
+ n_results=5, # You can adjust the number of results as needed
9
+ )
jio_savan_music_downloader/utils/__init__.py DELETED
@@ -1,15 +0,0 @@
1
- """
2
-
3
- The `utils` module provides various utility functions for file I/O, data encoding/decoding, and directory management.
4
-
5
- Functions:
6
- read_yaml: Reads a YAML file and returns its contents as a dictionary.
7
- create_directories: Creates directories if they do not exist.
8
- save_json: Saves data to a JSON file.
9
- load_json: Loads JSON data from a file.
10
- save_bin: Saves binary data to a file.
11
- load_bin: Loads binary data from a file.
12
- get_size: Returns the size of a file or directory in bytes.
13
- decode_image: Decodes an image from a base64 string.
14
- encode_image_into_base64: Encodes an image into a base64 string.
15
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
notebooks/trails.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml CHANGED
@@ -11,21 +11,16 @@ packages = [
11
  ]
12
 
13
  [tool.poetry.scripts]
14
- jio_savan_music_downloader = "jio_savan_music_downloader.main:main"
15
 
16
  [tool.poetry.dependencies]
17
- python = ">=3.11,<4.0"
18
- fastapi = "~0.100.0" # Latest compatible version
19
- uvicorn = "^0.23.0" # For running FastAPI apps
20
- fastapi-health = "^0.4.0" # Health check for FastAPI
21
- #pyyaml = "^6.0.2"
22
- #python-box = "^7.2.0"
23
- #ensure = "^1.0.4"
24
- #joblib = "^1.3.2" # Parallel processing and caching
25
- #python-dotenv = "^1.0.0" # Manage environment variables
26
- #PyPDF2 = "^3.0.0" # For PDF manipulation
27
- #pydantic = "^2.0.0" # Data validation and settings management
28
-
29
 
30
 
31
  [tool.poetry.group.dev.dependencies]
@@ -43,7 +38,8 @@ twine = "^4.0.0" # Publish packages
43
  bandit = "^1.8.0" # Security check
44
  pylint = "^3.0.0" # Powerful linter
45
  pydocstyle = "^6.3.0" # Enforce PEP 257 docstring conventions
46
- isort = "^5.12.0" # Sort imports
 
47
 
48
  [tool.poetry.group.docs.dependencies]
49
  mkdocs = "^1.5.0" # Documentation site generator
@@ -63,20 +59,6 @@ watchdog = "^3.0.0" # File monitoring
63
  mkdocstrings = {extras = ["python"], version = "^0.27.0"} # Auto-generate documentation from docstrings
64
  mkdocs-minify-plugin = "^0.8.0" # Minify HTML
65
 
66
- [tool.poetry.group.test.dependencies]
67
- pytest-mock = "^3.11.0" # Mocking in tests
68
- factory-boy = "^3.3.1" # Test data generation
69
- pytest-asyncio = "^0.21.0" # Async testing support
70
- pytest-xdist = "^3.3.1" # Parallel test execution
71
- freezegun = "^1.2.0" # Mock datetime
72
- pytest = "^7.2.0" # Testing framework
73
- allure-pytest = "^2.13.0" # Reporting for pytest
74
- pytest-sugar = "^0.9.7" # Better test output
75
- pytest-cov = "^4.0.0" # Test coverage reports
76
- httpx = "^0.24.0"
77
- pytest-runner = "^6.0.0" # Running tests via `python setup.py test`
78
-
79
-
80
  [build-system]
81
  requires = ["poetry-core>=1.0.0"]
82
  build-backend = "poetry.core.masonry.api"
@@ -101,7 +83,10 @@ module = [
101
  "yaml.*",
102
  "ensure.*",
103
  "fastapi_health.*",
104
- "jio_savan_music_downloader.main"
 
 
 
105
  ]
106
  ignore_missing_imports = true
107
  ignore_errors = true
@@ -178,11 +163,7 @@ source = ["jio_savan_music_downloader"]
178
  # concurrency = ["thread"]
179
  omit = [
180
  "**/__init__.py", # Exclude all init files
181
- "jio_savan_music_downloader/main.py", # Exclude main.py file
182
- "jio_savan_music_downloader/constants/*", # Exclude all files in a constants folder
183
- "jio_savan_music_downloader/exception/*", # Exclude all files in a exception folder
184
- "jio_savan_music_downloader/logger/*", # Exclude all files in a logger folder
185
- "jio_savan_music_downloader/entity/*", # Exclude all files in entity folder
186
  "jio_savan_music_downloader/config/*", # Exclude all files in config folder
187
  "jio_savan_music_downloader/models/*", # Exclude all files in model folder
188
  ]
@@ -200,13 +181,16 @@ glob = "pyproject.toml"
200
  search = 'version = "{current_version}"'
201
  replace = 'version = "{new_version}"'
202
 
203
- [[tool.bumpversion.files]]
204
- glob = "jio_savan_music_downloader/main.py"
205
- search = '__version__ = "{current_version}"'
206
- replace = '__version__ = "{new_version}"'
207
-
208
  [tool.deptry]
209
  exclude = ["research","artifacts", "notebooks", "tests", "docs", ".venv", "venv", "__pycache__", ".ruff_cache", ".pytest_cache", ".mypy_cache", ".coverage", ".git", "build", "dist", ".github", "site", "config"]
 
210
 
211
  [tool.pydocstyle]
212
- select = ["D101", "D102"]
 
 
 
 
 
 
 
 
11
  ]
12
 
13
  [tool.poetry.scripts]
14
+ jio_savan_music_downloader = "jio_savan_music_downloader.__main__:main"
15
 
16
  [tool.poetry.dependencies]
17
+ python = ">=3.11,<3.13"
18
+ crewai = {version = ">=0.86.0,<1.0.0", extras = ["tools"]}
19
+ streamlit = "^1.41.1"
20
+ selenium = "^4.27.1"
21
+ # chromedriver_autoinstaller = "^0.6.4"
22
+ # webdriver-manager = "*"
23
+ bs4 = "*"
 
 
 
 
 
24
 
25
 
26
  [tool.poetry.group.dev.dependencies]
 
38
  bandit = "^1.8.0" # Security check
39
  pylint = "^3.0.0" # Powerful linter
40
  pydocstyle = "^6.3.0" # Enforce PEP 257 docstring conventions
41
+ # isort = "^5.12.0" # Sort imports
42
+ # ruff = "^0.8.6" # Linting tool
43
 
44
  [tool.poetry.group.docs.dependencies]
45
  mkdocs = "^1.5.0" # Documentation site generator
 
59
  mkdocstrings = {extras = ["python"], version = "^0.27.0"} # Auto-generate documentation from docstrings
60
  mkdocs-minify-plugin = "^0.8.0" # Minify HTML
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  [build-system]
63
  requires = ["poetry-core>=1.0.0"]
64
  build-backend = "poetry.core.masonry.api"
 
83
  "yaml.*",
84
  "ensure.*",
85
  "fastapi_health.*",
86
+ "crewai.*",
87
+ "crewai_tools.*",
88
+ "bs4.*",
89
+ "jio_savan_music_downloader.crew"
90
  ]
91
  ignore_missing_imports = true
92
  ignore_errors = true
 
163
  # concurrency = ["thread"]
164
  omit = [
165
  "**/__init__.py", # Exclude all init files
166
+ "jio_savan_music_downloader/crew.py", # Exclude crew.py file
 
 
 
 
167
  "jio_savan_music_downloader/config/*", # Exclude all files in config folder
168
  "jio_savan_music_downloader/models/*", # Exclude all files in model folder
169
  ]
 
181
  search = 'version = "{current_version}"'
182
  replace = 'version = "{new_version}"'
183
 
 
 
 
 
 
184
  [tool.deptry]
185
  exclude = ["research","artifacts", "notebooks", "tests", "docs", ".venv", "venv", "__pycache__", ".ruff_cache", ".pytest_cache", ".mypy_cache", ".coverage", ".git", "build", "dist", ".github", "site", "config"]
186
+ ignore = ["DEP003"]
187
 
188
  [tool.pydocstyle]
189
+ select = ["D101", "D102"]
190
+
191
+ [tool.isort]
192
+ profile = "black"
193
+ known_third_party = ["requests", "bs4", "pydantic", "crewai_tools"]
194
+ default_section = "THIRDPARTY"
195
+ force_sort_within_sections = true
196
+ line_length = 120
tests/test_api_endpoint.py DELETED
@@ -1,15 +0,0 @@
1
- from starlette.testclient import TestClient
2
- from jio_savan_music_downloader.main import app
3
-
4
- client = TestClient(app=app)
5
-
6
- def test_train_model():
7
- response = client.post("/api/v1/upload-docs")
8
- assert response.status_code == 200
9
- assert response.json() is not None
10
-
11
-
12
- def test_predict():
13
- response = client.post("/api/v1/ask")
14
- assert response.status_code == 200
15
- assert response.json() is not None