Spaces:
Sleeping
Sleeping
Add text analysis tool and update project dependencies
Browse files- Introduced new text_analysis.py tool with comprehensive text analysis capabilities
- Updated requirements.txt with new dependencies for text analysis and project maintenance
- Integrated analyze_text tool into the CodeAgent in app.py
- app.py +2 -1
- requirements.txt +24 -5
- tools/text_analysis.py +86 -0
app.py
CHANGED
@@ -4,6 +4,7 @@ import requests
|
|
4 |
import pytz
|
5 |
import yaml
|
6 |
from tools.final_answer import FinalAnswerTool
|
|
|
7 |
|
8 |
from Gradio_UI import GradioUI
|
9 |
|
@@ -55,7 +56,7 @@ with open("prompts.yaml", "r") as stream:
|
|
55 |
|
56 |
agent = CodeAgent(
|
57 |
model=model,
|
58 |
-
tools=[final_answer],
|
59 |
max_steps=6,
|
60 |
verbosity_level=1,
|
61 |
grammar=None,
|
|
|
4 |
import pytz
|
5 |
import yaml
|
6 |
from tools.final_answer import FinalAnswerTool
|
7 |
+
from tools.text_analysis import analyze_text
|
8 |
|
9 |
from Gradio_UI import GradioUI
|
10 |
|
|
|
56 |
|
57 |
agent = CodeAgent(
|
58 |
model=model,
|
59 |
+
tools=[final_answer, analyze_text], # Added analyze_text tool
|
60 |
max_steps=6,
|
61 |
verbosity_level=1,
|
62 |
grammar=None,
|
requirements.txt
CHANGED
@@ -1,5 +1,24 @@
|
|
1 |
-
|
2 |
-
smolagents
|
3 |
-
|
4 |
-
|
5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Core dependencies
|
2 |
+
smolagents>=0.1.0
|
3 |
+
gradio>=4.0.0
|
4 |
+
pyyaml>=6.0.1
|
5 |
+
pytz>=2024.1
|
6 |
+
requests>=2.31.0
|
7 |
+
|
8 |
+
# For text analysis and NLP
|
9 |
+
textblob>=0.17.1
|
10 |
+
nltk>=3.8.1
|
11 |
+
|
12 |
+
# For web scraping and URL processing
|
13 |
+
beautifulsoup4>=4.12.3
|
14 |
+
validators>=0.22.0
|
15 |
+
|
16 |
+
# For data visualization
|
17 |
+
matplotlib>=3.8.2
|
18 |
+
numpy>=1.26.4
|
19 |
+
|
20 |
+
# Development dependencies
|
21 |
+
pytest>=8.0.0
|
22 |
+
black>=24.1.1
|
23 |
+
isort>=5.13.2
|
24 |
+
mypy>=1.8.0
|
tools/text_analysis.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, List, Union
|
2 |
+
import logging
|
3 |
+
from textblob import TextBlob
|
4 |
+
import nltk
|
5 |
+
from nltk.tokenize import sent_tokenize, word_tokenize
|
6 |
+
from nltk.corpus import stopwords
|
7 |
+
from smolagents import tool
|
8 |
+
|
9 |
+
# Set up logging
|
10 |
+
logger = logging.getLogger(__name__)
|
11 |
+
|
12 |
+
# Download required NLTK data
|
13 |
+
try:
|
14 |
+
nltk.download('punkt', quiet=True)
|
15 |
+
nltk.download('stopwords', quiet=True)
|
16 |
+
nltk.download('averaged_perceptron_tagger', quiet=True)
|
17 |
+
except Exception as e:
|
18 |
+
logger.error(f"Failed to download NLTK data: {e}")
|
19 |
+
|
20 |
+
@tool
|
21 |
+
def analyze_text(text: str) -> Dict[str, Union[str, float, List[str]]]:
|
22 |
+
"""Performs comprehensive text analysis including sentiment, readability, and key phrases.
|
23 |
+
|
24 |
+
Args:
|
25 |
+
text: The input text to analyze
|
26 |
+
|
27 |
+
Returns:
|
28 |
+
Dict containing analysis results including:
|
29 |
+
- sentiment: Dict with polarity and subjectivity scores
|
30 |
+
- key_phrases: List of important noun phrases
|
31 |
+
- readability: Basic readability metrics
|
32 |
+
- summary: Brief statistical summary
|
33 |
+
"""
|
34 |
+
try:
|
35 |
+
# Create TextBlob object
|
36 |
+
blob = TextBlob(text)
|
37 |
+
|
38 |
+
# Sentiment analysis
|
39 |
+
sentiment = {
|
40 |
+
"polarity": round(blob.sentiment.polarity, 2),
|
41 |
+
"subjectivity": round(blob.sentiment.subjectivity, 2),
|
42 |
+
"sentiment_label": "positive" if blob.sentiment.polarity > 0 else "negative" if blob.sentiment.polarity < 0 else "neutral"
|
43 |
+
}
|
44 |
+
|
45 |
+
# Extract key phrases (noun phrases)
|
46 |
+
key_phrases = list(set([str(phrase) for phrase in blob.noun_phrases]))[:5]
|
47 |
+
|
48 |
+
# Basic text statistics
|
49 |
+
sentences = sent_tokenize(text)
|
50 |
+
words = word_tokenize(text)
|
51 |
+
words_no_stop = [word.lower() for word in words
|
52 |
+
if word.lower() not in stopwords.words('english')
|
53 |
+
and word.isalnum()]
|
54 |
+
|
55 |
+
# Calculate readability (basic metric based on sentence and word length)
|
56 |
+
avg_sentence_length = len(words) / len(sentences)
|
57 |
+
avg_word_length = sum(len(word) for word in words_no_stop) / len(words_no_stop)
|
58 |
+
readability_score = round((avg_sentence_length * 0.39) + (avg_word_length * 11.8) - 15.59, 1)
|
59 |
+
|
60 |
+
# Prepare response
|
61 |
+
analysis_result = {
|
62 |
+
"sentiment": sentiment,
|
63 |
+
"key_phrases": key_phrases,
|
64 |
+
"readability": {
|
65 |
+
"score": readability_score,
|
66 |
+
"avg_sentence_length": round(avg_sentence_length, 1),
|
67 |
+
"avg_word_length": round(avg_word_length, 1)
|
68 |
+
},
|
69 |
+
"summary": {
|
70 |
+
"sentence_count": len(sentences),
|
71 |
+
"word_count": len(words),
|
72 |
+
"unique_words": len(set(words_no_stop))
|
73 |
+
}
|
74 |
+
}
|
75 |
+
|
76 |
+
return analysis_result
|
77 |
+
|
78 |
+
except Exception as e:
|
79 |
+
logger.error(f"Error in text analysis: {e}")
|
80 |
+
return {
|
81 |
+
"error": f"Analysis failed: {str(e)}",
|
82 |
+
"sentiment": {"polarity": 0, "subjectivity": 0, "sentiment_label": "error"},
|
83 |
+
"key_phrases": [],
|
84 |
+
"readability": {"score": 0, "avg_sentence_length": 0, "avg_word_length": 0},
|
85 |
+
"summary": {"sentence_count": 0, "word_count": 0, "unique_words": 0}
|
86 |
+
}
|