strickvl commited on
Commit
4e257c7
·
unverified ·
1 Parent(s): 08c62ca

Add text analysis tool and update project dependencies

Browse files

- Introduced new text_analysis.py tool with comprehensive text analysis capabilities
- Updated requirements.txt with new dependencies for text analysis and project maintenance
- Integrated analyze_text tool into the CodeAgent in app.py

Files changed (3) hide show
  1. app.py +2 -1
  2. requirements.txt +24 -5
  3. tools/text_analysis.py +86 -0
app.py CHANGED
@@ -4,6 +4,7 @@ import requests
4
  import pytz
5
  import yaml
6
  from tools.final_answer import FinalAnswerTool
 
7
 
8
  from Gradio_UI import GradioUI
9
 
@@ -55,7 +56,7 @@ with open("prompts.yaml", "r") as stream:
55
 
56
  agent = CodeAgent(
57
  model=model,
58
- tools=[final_answer], ## add your tools here (don't remove final answer)
59
  max_steps=6,
60
  verbosity_level=1,
61
  grammar=None,
 
4
  import pytz
5
  import yaml
6
  from tools.final_answer import FinalAnswerTool
7
+ from tools.text_analysis import analyze_text
8
 
9
  from Gradio_UI import GradioUI
10
 
 
56
 
57
  agent = CodeAgent(
58
  model=model,
59
+ tools=[final_answer, analyze_text], # Added analyze_text tool
60
  max_steps=6,
61
  verbosity_level=1,
62
  grammar=None,
requirements.txt CHANGED
@@ -1,5 +1,24 @@
1
- markdownify
2
- smolagents
3
- requests
4
- duckduckgo_search
5
- pandas
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies
2
+ smolagents>=0.1.0
3
+ gradio>=4.0.0
4
+ pyyaml>=6.0.1
5
+ pytz>=2024.1
6
+ requests>=2.31.0
7
+
8
+ # For text analysis and NLP
9
+ textblob>=0.17.1
10
+ nltk>=3.8.1
11
+
12
+ # For web scraping and URL processing
13
+ beautifulsoup4>=4.12.3
14
+ validators>=0.22.0
15
+
16
+ # For data visualization
17
+ matplotlib>=3.8.2
18
+ numpy>=1.26.4
19
+
20
+ # Development dependencies
21
+ pytest>=8.0.0
22
+ black>=24.1.1
23
+ isort>=5.13.2
24
+ mypy>=1.8.0
tools/text_analysis.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Union
2
+ import logging
3
+ from textblob import TextBlob
4
+ import nltk
5
+ from nltk.tokenize import sent_tokenize, word_tokenize
6
+ from nltk.corpus import stopwords
7
+ from smolagents import tool
8
+
9
+ # Set up logging
10
+ logger = logging.getLogger(__name__)
11
+
12
+ # Download required NLTK data
13
+ try:
14
+ nltk.download('punkt', quiet=True)
15
+ nltk.download('stopwords', quiet=True)
16
+ nltk.download('averaged_perceptron_tagger', quiet=True)
17
+ except Exception as e:
18
+ logger.error(f"Failed to download NLTK data: {e}")
19
+
20
+ @tool
21
+ def analyze_text(text: str) -> Dict[str, Union[str, float, List[str]]]:
22
+ """Performs comprehensive text analysis including sentiment, readability, and key phrases.
23
+
24
+ Args:
25
+ text: The input text to analyze
26
+
27
+ Returns:
28
+ Dict containing analysis results including:
29
+ - sentiment: Dict with polarity and subjectivity scores
30
+ - key_phrases: List of important noun phrases
31
+ - readability: Basic readability metrics
32
+ - summary: Brief statistical summary
33
+ """
34
+ try:
35
+ # Create TextBlob object
36
+ blob = TextBlob(text)
37
+
38
+ # Sentiment analysis
39
+ sentiment = {
40
+ "polarity": round(blob.sentiment.polarity, 2),
41
+ "subjectivity": round(blob.sentiment.subjectivity, 2),
42
+ "sentiment_label": "positive" if blob.sentiment.polarity > 0 else "negative" if blob.sentiment.polarity < 0 else "neutral"
43
+ }
44
+
45
+ # Extract key phrases (noun phrases)
46
+ key_phrases = list(set([str(phrase) for phrase in blob.noun_phrases]))[:5]
47
+
48
+ # Basic text statistics
49
+ sentences = sent_tokenize(text)
50
+ words = word_tokenize(text)
51
+ words_no_stop = [word.lower() for word in words
52
+ if word.lower() not in stopwords.words('english')
53
+ and word.isalnum()]
54
+
55
+ # Calculate readability (basic metric based on sentence and word length)
56
+ avg_sentence_length = len(words) / len(sentences)
57
+ avg_word_length = sum(len(word) for word in words_no_stop) / len(words_no_stop)
58
+ readability_score = round((avg_sentence_length * 0.39) + (avg_word_length * 11.8) - 15.59, 1)
59
+
60
+ # Prepare response
61
+ analysis_result = {
62
+ "sentiment": sentiment,
63
+ "key_phrases": key_phrases,
64
+ "readability": {
65
+ "score": readability_score,
66
+ "avg_sentence_length": round(avg_sentence_length, 1),
67
+ "avg_word_length": round(avg_word_length, 1)
68
+ },
69
+ "summary": {
70
+ "sentence_count": len(sentences),
71
+ "word_count": len(words),
72
+ "unique_words": len(set(words_no_stop))
73
+ }
74
+ }
75
+
76
+ return analysis_result
77
+
78
+ except Exception as e:
79
+ logger.error(f"Error in text analysis: {e}")
80
+ return {
81
+ "error": f"Analysis failed: {str(e)}",
82
+ "sentiment": {"polarity": 0, "subjectivity": 0, "sentiment_label": "error"},
83
+ "key_phrases": [],
84
+ "readability": {"score": 0, "avg_sentence_length": 0, "avg_word_length": 0},
85
+ "summary": {"sentence_count": 0, "word_count": 0, "unique_words": 0}
86
+ }