From c6b16b9ccf3106db76594198bfc0ed90f448e7ae Mon Sep 17 00:00:00 2001 From: rnsrk Date: Fri, 13 Jan 2023 20:26:04 +0100 Subject: [PATCH] Optimized text cleanup and function parameters --- Main.py | 6 +++--- SentiTooter.py | 10 +++++----- TootCrawler.py | 26 +++++++++++++------------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Main.py b/Main.py index 879e487..8fbf829 100644 --- a/Main.py +++ b/Main.py @@ -93,7 +93,7 @@ axes[1].tick_params(which='minor', length=0) plotFileUrl = f'./plots/{TodayDate}.png' plt.savefig(plotFileUrl) -""" -media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the compounds up to {TodayDate}.") + +media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.") mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en') -""" + diff --git a/SentiTooter.py b/SentiTooter.py index 1d89ab6..26b7a47 100644 --- a/SentiTooter.py +++ b/SentiTooter.py @@ -28,14 +28,14 @@ class SentiTooter: self.labels = ['negative', 'neutral', 'positive'] self.sia = SentimentIntensityAnalyzer() - def analyze(self, toot): - match toot.language: + def analyze(self, language, content): + match language: case 'de': - sentiment = self.deModel.predict_sentiment([toot.content]) + sentiment = self.deModel.predict_sentiment([content]) sentiment.append('germanSentiment') return sentiment case 'en': - text = preprocess(toot.content) + text = preprocess(content) encoded_input = self.enTokenizer(text, return_tensors='pt') output = self.enModel(**encoded_input) scores = output[0][0].detach().numpy() @@ -45,7 +45,7 @@ class SentiTooter: sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment'] return sentiment case _: - compound = self.sia.polarity_scores(toot.content)['compound'] + compound = self.sia.polarity_scores(content)['compound'] if compound > (1 / 3): return ['positive', 'vaderSentiment'] elif compound < (-1 / 3): diff --git a/TootCrawler.py b/TootCrawler.py index 5d51b75..1b081c2 100644 --- a/TootCrawler.py +++ b/TootCrawler.py @@ -31,18 +31,18 @@ class TootCrawler(): timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination) for i in allTimelineResults: content = self.cleanhtml(i.content) - sentiment = self.sentiTooter.analyze(i) - toots.append( - { - "sentiment": sentiment[0], - "model": sentiment[1], - "userName": i.account.display_name, - "userId": i.account.id, - "toot": content, - "datetime": i.created_at.astimezone(self.localTimezone), - "language": detect(content), - "tootId": i.id - } - ) + language = detect(content) + sentiment = self.sentiTooter.analyze(language, content) + toot = { + "sentiment": sentiment[0], + "model": sentiment[1], + "toot": content, + "datetime": i.created_at.astimezone(self.localTimezone), + "language": language, + "userName": i.account.display_name, + "userId": i.account.id, + "tootId": i.id + } + toots.append(toot) toots.sort(key=lambda item:item.get('datetime')) return pd.DataFrame.from_records(toots) \ No newline at end of file