Optimized text cleanup and function parameters

This commit is contained in:
rnsrk 2023-01-13 20:26:04 +01:00
parent a20f7331bb
commit c6b16b9ccf
3 changed files with 21 additions and 21 deletions

View file

@ -93,7 +93,7 @@ axes[1].tick_params(which='minor', length=0)
plotFileUrl = f'./plots/{TodayDate}.png' plotFileUrl = f'./plots/{TodayDate}.png'
plt.savefig(plotFileUrl) plt.savefig(plotFileUrl)
"""
media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the compounds up to {TodayDate}.") media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en') mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en')
"""

View file

@ -28,14 +28,14 @@ class SentiTooter:
self.labels = ['negative', 'neutral', 'positive'] self.labels = ['negative', 'neutral', 'positive']
self.sia = SentimentIntensityAnalyzer() self.sia = SentimentIntensityAnalyzer()
def analyze(self, toot): def analyze(self, language, content):
match toot.language: match language:
case 'de': case 'de':
sentiment = self.deModel.predict_sentiment([toot.content]) sentiment = self.deModel.predict_sentiment([content])
sentiment.append('germanSentiment') sentiment.append('germanSentiment')
return sentiment return sentiment
case 'en': case 'en':
text = preprocess(toot.content) text = preprocess(content)
encoded_input = self.enTokenizer(text, return_tensors='pt') encoded_input = self.enTokenizer(text, return_tensors='pt')
output = self.enModel(**encoded_input) output = self.enModel(**encoded_input)
scores = output[0][0].detach().numpy() scores = output[0][0].detach().numpy()
@ -45,7 +45,7 @@ class SentiTooter:
sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment'] sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment']
return sentiment return sentiment
case _: case _:
compound = self.sia.polarity_scores(toot.content)['compound'] compound = self.sia.polarity_scores(content)['compound']
if compound > (1 / 3): if compound > (1 / 3):
return ['positive', 'vaderSentiment'] return ['positive', 'vaderSentiment']
elif compound < (-1 / 3): elif compound < (-1 / 3):

View file

@ -31,18 +31,18 @@ class TootCrawler():
timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination) timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
for i in allTimelineResults: for i in allTimelineResults:
content = self.cleanhtml(i.content) content = self.cleanhtml(i.content)
sentiment = self.sentiTooter.analyze(i) language = detect(content)
toots.append( sentiment = self.sentiTooter.analyze(language, content)
{ toot = {
"sentiment": sentiment[0], "sentiment": sentiment[0],
"model": sentiment[1], "model": sentiment[1],
"userName": i.account.display_name, "toot": content,
"userId": i.account.id, "datetime": i.created_at.astimezone(self.localTimezone),
"toot": content, "language": language,
"datetime": i.created_at.astimezone(self.localTimezone), "userName": i.account.display_name,
"language": detect(content), "userId": i.account.id,
"tootId": i.id "tootId": i.id
} }
) toots.append(toot)
toots.sort(key=lambda item:item.get('datetime')) toots.sort(key=lambda item:item.get('datetime'))
return pd.DataFrame.from_records(toots) return pd.DataFrame.from_records(toots)