Optimized text cleanup and function parameters

2023-01-13 20:26:04 +01:00 · 2023-01-13 20:26:04 +01:00 · c6b16b9ccf
commit c6b16b9ccf
parent a20f7331bb
3 changed files with 21 additions and 21 deletions
--- a/Main.py
+++ b/Main.py
@ -93,7 +93,7 @@ axes[1].tick_params(which='minor', length=0)
 plotFileUrl = f'./plots/{TodayDate}.png'
 plt.savefig(plotFileUrl)
-"""
+
-media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the compounds up to {TodayDate}.")
+media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
 mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en')
-"""
+
--- a/SentiTooter.py
+++ b/SentiTooter.py
@ -28,14 +28,14 @@ class SentiTooter:
        self.labels = ['negative', 'neutral', 'positive']
        self.sia = SentimentIntensityAnalyzer()
-    def analyze(self, toot):
+    def analyze(self, language, content):
-        match toot.language:
+        match language:
            case 'de':
-                sentiment = self.deModel.predict_sentiment([toot.content])
+                sentiment = self.deModel.predict_sentiment([content])
                sentiment.append('germanSentiment')
                return sentiment
            case 'en':
-                text = preprocess(toot.content)
+                text = preprocess(content)
                encoded_input = self.enTokenizer(text, return_tensors='pt')
                output = self.enModel(**encoded_input)
                scores = output[0][0].detach().numpy()
@ -45,7 +45,7 @@ class SentiTooter:
                sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment']
                return sentiment
            case _:
-                compound = self.sia.polarity_scores(toot.content)['compound']
+                compound = self.sia.polarity_scores(content)['compound']
                if compound > (1 / 3):
                    return ['positive', 'vaderSentiment']
                elif compound < (-1 / 3):
--- a/TootCrawler.py
+++ b/TootCrawler.py
@ -31,18 +31,18 @@ class TootCrawler():
            timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
        for i in allTimelineResults:
            content = self.cleanhtml(i.content)
-            sentiment = self.sentiTooter.analyze(i)
+            language = detect(content)
-            toots.append(
+            sentiment = self.sentiTooter.analyze(language, content)
-                    {
+            toot = {
-                            "sentiment": sentiment[0],
+                "sentiment": sentiment[0],
-                            "model": sentiment[1],
+                "model": sentiment[1],
-                            "userName": i.account.display_name,
+                "toot": content,
-                            "userId": i.account.id,
+                "datetime": i.created_at.astimezone(self.localTimezone),
-                            "toot": content,
+                "language": language,
-                            "datetime": i.created_at.astimezone(self.localTimezone),
+                "userName": i.account.display_name,
-                            "language": detect(content),
+                "userId": i.account.id,
-                            "tootId": i.id
+                "tootId": i.id
-                    }
+            }
-                )
+            toots.append(toot)
        toots.sort(key=lambda item:item.get('datetime'))
        return pd.DataFrame.from_records(toots)