From fae306916f32d55e3f47d239fd3fbfd3a8a9c50a Mon Sep 17 00:00:00 2001 From: rnsrk Date: Tue, 24 Jan 2023 21:23:37 +0100 Subject: [PATCH] implement scores for roberta and germanSentiment --- Main.py | 6 +++--- SentiTooter.py | 12 ++++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Main.py b/Main.py index b80b3a6..a64a63c 100644 --- a/Main.py +++ b/Main.py @@ -25,7 +25,7 @@ crudManager = CRUDManager() lastTootId = crudManager.getLastToot() tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId) - +exit() if not tootsDataframe.empty: crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False) else: @@ -93,6 +93,6 @@ axes[1].tick_params(which='minor', length=0) plotFileUrl = f'./plots/{TodayDate}.png' plt.savefig(plotFileUrl) -media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.") -mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en') +#media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.") +#mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en') diff --git a/SentiTooter.py b/SentiTooter.py index 26b7a47..9626078 100644 --- a/SentiTooter.py +++ b/SentiTooter.py @@ -31,21 +31,25 @@ class SentiTooter: def analyze(self, language, content): match language: case 'de': - sentiment = self.deModel.predict_sentiment([content]) - sentiment.append('germanSentiment') - return sentiment + sentimentList, probabilitiesList = self.deModel.predict_sentiment([content], output_probabilities=True) + sentiment = sentimentList[0] + score = {i[0]: i[1] for i in probabilitiesList[0]}[sentiment] + return [sentiment, 'germanSentiment', score] case 'en': text = preprocess(content) encoded_input = self.enTokenizer(text, return_tensors='pt') output = self.enModel(**encoded_input) scores = output[0][0].detach().numpy() scores = softmax(scores) + print(scores) sentimentIndexWithMaxScore = np.argmax(scores) sentimentLabel = self.labels[sentimentIndexWithMaxScore] - sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment'] + sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment', max(scores)] + print(sentiment) return sentiment case _: compound = self.sia.polarity_scores(content)['compound'] + print(self.sia.polarity_scores(content), 'vaderSentiment') if compound > (1 / 3): return ['positive', 'vaderSentiment'] elif compound < (-1 / 3):