45 id = db.Column(db.Integer, primary_key=
True)
46 search_term = db.Column(db.String(150), nullable=
False)
47 subreddit = db.Column(db.String(150), nullable=
True)
48 sort_order = db.Column(db.String(50), nullable=
True)
49 time_filter = db.Column(db.String(50), nullable=
True)
50 comment_sort_order = db.Column(db.String(50), nullable=
True)
51 overall_sentiment = db.Column(db.String(50), nullable=
False)
52 positive_percentage = db.Column(db.Float, nullable=
False)
53 total_comments = db.Column(db.Integer, nullable=
False)
54 created_at = db.Column(db.DateTime, default=get_utc_plus_one)
55 quick_sentiment_summary = db.Column(db.String(150), nullable=
True)
56 analysis_description = db.Column(db.String(512), nullable=
True)
57 sentiment_pie_chart_filename = db.Column(db.String(255), nullable=
True)
58 word_cloud_filename = db.Column(db.String(255), nullable=
True)
59 gpt_prompt_type = db.Column(db.String(100), nullable=
True)
75def fetch_comments_from_posts(search_term, subreddit='all', sort_order='default', time_filter='all', comment_sort_order='top', max_comments=1000, max_comments_per_post=75):
80 search_query = {
"time_filter": time_filter}
81 if sort_order !=
'default':
82 search_query[
"sort"] = sort_order
85 search_results = reddit.subreddit(subreddit).search(search_term, **search_query)
87 for submission
in search_results:
88 if comment_count >= max_comments:
90 submission.comment_sort = comment_sort_order
91 submission.comments.replace_more(limit=1)
92 for comment
in submission.comments.list():
93 if comment_count >= max_comments:
95 comments.append(comment.body)
97 if comment_count % max_comments_per_post == 0:
99 except Exception
as e:
100 error_message =
"The subreddit you entered could not be found. Please check the name and try again."
102 return comments, error_message
184 'default':
'data/gpt_prompt.txt',
185 'contextual':
'data/gpt_contextual.txt',
186 'emotional':
'data/gpt_emotional.txt',
187 'comparative':
'data/gpt_comparative.txt',
188 'impact':
'data/gpt_impact.txt',
189 'ai_generated':
'data/gpt_ai_generated.txt'
191 prompt_file = prompt_files.get(prompt_type,
'data/gpt_prompt.txt')
194 tokenizer = GPT2Tokenizer.from_pretrained(
"gpt2")
197 prompt_path = os.path.join(os.path.dirname(__file__), prompt_file)
198 with open(prompt_path,
'r')
as file:
199 prompt_text = file.read()
204 prompt_tokens = tokenizer.encode(prompt_text, add_special_tokens=
True)
207 max_comment_tokens = max_tokens - len(prompt_tokens) - safe_margin
209 prepared_comments = []
212 for comment
in comments:
213 comment_tokens = tokenizer.encode(comment, add_special_tokens=
True)
214 if total_tokens + len(comment_tokens) > max_comment_tokens:
216 prepared_comments.append(comment)
217 total_tokens += len(comment_tokens)
220 combined_text = f
"{prompt_text}\n\n" +
'\n'.join([f
"{idx + 1}: {comment}" for idx, comment
in enumerate(prepared_comments)])
221 combined_tokens = tokenizer.encode(combined_text, add_special_tokens=
True)
224 while len(combined_tokens) > max_tokens - safe_margin:
225 if not prepared_comments:
227 prepared_comments.pop()
228 combined_text = f
"{prompt_text}\n\n" +
'\n'.join([f
"{idx + 1}: {comment}" for idx, comment
in enumerate(prepared_comments)])
229 combined_tokens = tokenizer.encode(combined_text, add_special_tokens=
True)
231 return combined_text, len(prepared_comments)
237 if used_comments == 0:
238 return "Error",
"No comments were analyzed due to token limit restrictions."
241 response = openai.ChatCompletion.create(
242 model=
"gpt-3.5-turbo",
244 {
"role":
"system",
"content":
"You are a helpful assistant."},
245 {
"role":
"user",
"content": combined_text}
251 full_response = response[
"choices"][0][
"message"][
"content"].strip()
254 if prompt_type ==
'ai_generated':
256 quick_sentiment_summary =
"No label due to AI-generated prompt"
260 label_match = re.search(
r"Overall Sentiment:\s*(Highly Negative|Very Negative|Mostly Negative|Neutral|Mostly Positive|Very Positive|Highly Positive)", full_response, re.IGNORECASE)
262 quick_sentiment_summary = label_match.group(1).strip()
263 analysis_description_raw = full_response.replace(label_match.group(0),
'').strip()
266 quick_sentiment_summary =
"Label not found"
267 analysis_description =
"Analysis description not found due to an unexpected response format."
269 except Exception
as e:
270 print(f
"An error occurred: {e}")
271 quick_sentiment_summary =
"Error"
272 analysis_description =
"Could not analyze sentiment due to an error."
274 return quick_sentiment_summary, analysis_description
279 labels = [
'Positive',
'Negative']
280 sizes = [positive_percentage, 100 - positive_percentage]
281 colors = [
'#ff9999',
'#66b3ff']
283 plt.figure(figsize=(5, 5))
284 plt.pie(sizes, labels=labels, colors=colors, autopct=
'%1.1f%%', startangle=140)
286 plt.savefig(image_path)
290 text =
' '.join(comments)
291 wordcloud = WordCloud(stopwords=STOPWORDS, background_color=
'white', max_words=100, width=800, height=400).generate(text)
293 plt.figure(figsize=(10, 5))
294 plt.imshow(wordcloud, interpolation=
'bilinear')
296 plt.savefig(image_path)
303 for submission
in reddit.subreddit(subreddit).search(search_term, limit=10):
304 submission.comment_sort =
'top'
305 submission.comments.replace_more(limit=0)
306 for comment
in submission.comments.list():
307 if len(comments) < limit:
308 comments.append(comment.body)
311 if len(comments) >= limit:
313 except Exception
as e:
314 print(f
"An error occurred while fetching comments: {str(e)}")
319@app.route('/analysis', methods=['GET', 'POST'])
321 if request.method ==
'POST':
322 search_term = request.form.get(
'search_term')
323 prompt_type = request.form.get(
'prompt_type')
or 'default'
324 subreddit = request.form.get(
'subreddit')
or 'all'
325 sort_order = request.form.get(
'sort_order')
or 'default'
326 time_filter = request.form.get(
'time_filter')
or 'all'
327 comment_sort_order = request.form.get(
'comment_sort_order')
or 'top'
332 sentiment_pie_chart_filename =
None
333 word_cloud_filename =
None
336 return render_template(
'index.html', error_message=error_message)
339 error_message =
"Sorry, we couldn't find any comments related to your search. Please try a different search term or subreddit."
340 return render_template(
'index.html', error_message=error_message)
344 preprocessed_comments = [
preprocess_text(comment)
for comment
in comments]
347 positive_count = sentiments.count(
'Positive')
348 total_comments = len(sentiments)
349 positive_percentage = (positive_count / total_comments * 100)
if total_comments
else 0
355 sentiment_pie_chart_filename = f
"images/sentiment_pie_{uuid4()}.png"
356 word_cloud_filename = f
"images/word_cloud_{uuid4()}.png"
359 sentiment_pie_chart_path = os.path.join(app.static_folder, sentiment_pie_chart_filename)
360 word_cloud_path = os.path.join(app.static_folder, word_cloud_filename)
366 search_term=search_term,
368 sort_order=sort_order,
369 time_filter=time_filter,
370 comment_sort_order=comment_sort_order,
371 overall_sentiment=overall_sentiment,
372 positive_percentage=positive_percentage,
373 total_comments=total_comments,
374 quick_sentiment_summary=quick_sentiment_summary,
375 analysis_description=analysis_description,
376 sentiment_pie_chart_filename=sentiment_pie_chart_filename,
377 word_cloud_filename=word_cloud_filename,
378 gpt_prompt_type=prompt_type
380 db.session.add(new_search)
383 return render_template(
'index.html', search_term=search_term, subreddit=subreddit, sort_order=sort_order, time_filter=time_filter, comment_sort_order=comment_sort_order, overall_sentiment=overall_sentiment, positive_percentage=positive_percentage, total_comments=total_comments, quick_sentiment_summary=quick_sentiment_summary, analysis_description=analysis_description, sentiment_pie_chart_filename=sentiment_pie_chart_filename, word_cloud_filename=word_cloud_filename, prompt_type=prompt_type)
385 return render_template(
'index.html')
387@app.route('/generate_prompt', methods=['POST'])
434 sort_order = request.args.get(
'sort',
'newest')
435 search_term = request.args.get(
'search')
437 query = SearchHistory.query
441 search_term = search_term.lower()
442 query = query.filter(func.lower(SearchHistory.search_term) == search_term)
444 if sort_order ==
'newest':
445 query = query.order_by(SearchHistory.created_at.desc())
446 elif sort_order ==
'oldest':
447 query = query.order_by(SearchHistory.created_at)
448 elif sort_order ==
'sentiment_asc':
449 query = query.order_by(SearchHistory.overall_sentiment)
450 elif sort_order ==
'sentiment_desc':
451 query = query.order_by(SearchHistory.overall_sentiment.desc())
453 searches = query.all()
455 if not searches
and search_term:
456 flash(
'No results found for your search term. Please try a different one.',
'info')
458 return render_template(
'history.html', searches=searches)
fetch_comments_from_posts(search_term, subreddit='all', sort_order='default', time_filter='all', comment_sort_order='top', max_comments=1000, max_comments_per_post=75)