Optimized text cleanup and function parameters
This commit is contained in:
parent
a20f7331bb
commit
c6b16b9ccf
3 changed files with 21 additions and 21 deletions
6
Main.py
6
Main.py
|
|
@ -93,7 +93,7 @@ axes[1].tick_params(which='minor', length=0)
|
||||||
plotFileUrl = f'./plots/{TodayDate}.png'
|
plotFileUrl = f'./plots/{TodayDate}.png'
|
||||||
plt.savefig(plotFileUrl)
|
plt.savefig(plotFileUrl)
|
||||||
|
|
||||||
"""
|
|
||||||
media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the compounds up to {TodayDate}.")
|
media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
|
||||||
mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en')
|
mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en')
|
||||||
"""
|
|
||||||
|
|
|
||||||
|
|
@ -28,14 +28,14 @@ class SentiTooter:
|
||||||
self.labels = ['negative', 'neutral', 'positive']
|
self.labels = ['negative', 'neutral', 'positive']
|
||||||
self.sia = SentimentIntensityAnalyzer()
|
self.sia = SentimentIntensityAnalyzer()
|
||||||
|
|
||||||
def analyze(self, toot):
|
def analyze(self, language, content):
|
||||||
match toot.language:
|
match language:
|
||||||
case 'de':
|
case 'de':
|
||||||
sentiment = self.deModel.predict_sentiment([toot.content])
|
sentiment = self.deModel.predict_sentiment([content])
|
||||||
sentiment.append('germanSentiment')
|
sentiment.append('germanSentiment')
|
||||||
return sentiment
|
return sentiment
|
||||||
case 'en':
|
case 'en':
|
||||||
text = preprocess(toot.content)
|
text = preprocess(content)
|
||||||
encoded_input = self.enTokenizer(text, return_tensors='pt')
|
encoded_input = self.enTokenizer(text, return_tensors='pt')
|
||||||
output = self.enModel(**encoded_input)
|
output = self.enModel(**encoded_input)
|
||||||
scores = output[0][0].detach().numpy()
|
scores = output[0][0].detach().numpy()
|
||||||
|
|
@ -45,7 +45,7 @@ class SentiTooter:
|
||||||
sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment']
|
sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment']
|
||||||
return sentiment
|
return sentiment
|
||||||
case _:
|
case _:
|
||||||
compound = self.sia.polarity_scores(toot.content)['compound']
|
compound = self.sia.polarity_scores(content)['compound']
|
||||||
if compound > (1 / 3):
|
if compound > (1 / 3):
|
||||||
return ['positive', 'vaderSentiment']
|
return ['positive', 'vaderSentiment']
|
||||||
elif compound < (-1 / 3):
|
elif compound < (-1 / 3):
|
||||||
|
|
|
||||||
|
|
@ -31,18 +31,18 @@ class TootCrawler():
|
||||||
timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
|
timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
|
||||||
for i in allTimelineResults:
|
for i in allTimelineResults:
|
||||||
content = self.cleanhtml(i.content)
|
content = self.cleanhtml(i.content)
|
||||||
sentiment = self.sentiTooter.analyze(i)
|
language = detect(content)
|
||||||
toots.append(
|
sentiment = self.sentiTooter.analyze(language, content)
|
||||||
{
|
toot = {
|
||||||
"sentiment": sentiment[0],
|
"sentiment": sentiment[0],
|
||||||
"model": sentiment[1],
|
"model": sentiment[1],
|
||||||
"userName": i.account.display_name,
|
"toot": content,
|
||||||
"userId": i.account.id,
|
"datetime": i.created_at.astimezone(self.localTimezone),
|
||||||
"toot": content,
|
"language": language,
|
||||||
"datetime": i.created_at.astimezone(self.localTimezone),
|
"userName": i.account.display_name,
|
||||||
"language": detect(content),
|
"userId": i.account.id,
|
||||||
"tootId": i.id
|
"tootId": i.id
|
||||||
}
|
}
|
||||||
)
|
toots.append(toot)
|
||||||
toots.sort(key=lambda item:item.get('datetime'))
|
toots.sort(key=lambda item:item.get('datetime'))
|
||||||
return pd.DataFrame.from_records(toots)
|
return pd.DataFrame.from_records(toots)
|
||||||
Loading…
Add table
Add a link
Reference in a new issue