implement word counts.

This commit is contained in:
rnsrk 2023-03-15 16:02:47 +01:00
parent 6a8caac29e
commit 4479bd2429
2 changed files with 47 additions and 18 deletions

41
Main.py
View file

@ -6,11 +6,12 @@ from MastodonAccountManager import MastodonAccountManager
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import matplotlib.dates as mdates import matplotlib.dates as mdates
from TootCrawler import TootCrawler from TootCrawler import TootCrawler
from SentiTooter import translateToots, countWords from SentiTooter import translateToots, createWordCountPerSentiment
locale.setlocale(locale.LC_TIME, "en_US.UTF-8") locale.setlocale(locale.LC_TIME, "en_US.UTF-8")
init_db() init_db()
print('Initialize Mastodon...')
mastodonAccountManager = MastodonAccountManager() mastodonAccountManager = MastodonAccountManager()
mastodonInstance = mastodonAccountManager.instance mastodonInstance = mastodonAccountManager.instance
""" """
@ -20,34 +21,47 @@ mastodonInstance.log_in(
to_file = 'hedonodon_usercred.secret' to_file = 'hedonodon_usercred.secret'
) )
""" """
print('done!')
print('Fetching recent toots...')
tootCrawler = TootCrawler(mastodonInstance) tootCrawler = TootCrawler(mastodonInstance)
crudManager = CRUDManager() crudManager = CRUDManager()
lastTootId = crudManager.getLastToot() lastTootId = crudManager.getLastToot()
tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId) tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
print('done!')
print('Save toots to database...')
if not tootsDataframe.empty: if not tootsDataframe.empty:
crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False) crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
print('done!')
else: else:
print('Nothing changed since last database insert!') print('nothing changed since last database insert!')
print('Calculate word counts...')
yesterdaysToots = getYesterdaysToots() yesterdaysToots = getYesterdaysToots()
translatedToots = translateToots(yesterdaysToots) translatedToots = translateToots(yesterdaysToots)
tootsSeries = translatedToots.toot wordCountsPerSentiment = createWordCountPerSentiment(translatedToots)
wordCounts = countWords(tootsSeries.str.cat(sep=' '), 10) print('done!')
print(wordCounts);
print("exit programm")
exit()
sentimentsYesterday = calculateSentimentCount()
sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
print(wordCountsPerSentiment);
print('Calculate sentiment counts...')
sentimentsYesterday = calculateSentimentCount()
print('done!')
print('Calculate sentiment mean...')
sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
print('done!')
print('Save calculations to database...')
if not tootsDataframe.empty: if not tootsDataframe.empty:
crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True) crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True)
crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True) crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True)
print('done!')
else: else:
print('Nothing changed since last database insert!') print('nothing changed since last database insert!')
print('Create figure...')
colormap = { colormap = {
'negative': '#ff9999', 'negative': '#ff9999',
'neutral': '#ffcc99', 'neutral': '#ffcc99',
@ -100,7 +114,8 @@ axes[1].xaxis.set_minor_formatter(mdates.DateFormatter('%h'))
axes[1].tick_params(which='minor', length=0) axes[1].tick_params(which='minor', length=0)
plotFileUrl = f'./plots/{TodayDate}.png' plotFileUrl = f'./plots/{TodayDate}.png'
plt.savefig(plotFileUrl) plt.savefig(plotFileUrl)
print('done!')
#media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.") media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
#mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en') mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.\nWord counts per sentiment:\n{wordCountsPerSentiment}', media_ids=media, language='en')

View file

@ -71,20 +71,20 @@ class SentiTooter:
def translateToots(yesterdaysToots): def translateToots(yesterdaysToots):
yesterdaysTootsTranslated = yesterdaysToots yesterdaysTootsTranslated = yesterdaysToots
for index, row in yesterdaysTootsTranslated.iterrows(): for index, row in yesterdaysTootsTranslated.iterrows():
if (row['language'] != 'de'): if (row['language'] != 'en'):
try: try:
yesterdaysTootsTranslated.at[index,'toot'] = translateToot(row['language'], row['toot']) yesterdaysTootsTranslated.at[index,'toot'] = translateToot(row['language'], row['toot'])
yesterdaysTootsTranslated.at[index,'language'] = 'de' yesterdaysTootsTranslated.at[index,'language'] = 'en'
except: except:
yesterdaysTootsTranslated.drop(index) yesterdaysTootsTranslated.drop(index)
return yesterdaysTootsTranslated return yesterdaysTootsTranslated
def translateToot(language, toot): def translateToot(language, toot):
content = preprocess(toot) content = preprocess(toot)
return GoogleTranslator(source=language, target='de').translate(content) return GoogleTranslator(source=language, target='en').translate(content)
def countWords(concatedToots, count): def countWords(concatedToots, count):
nlp = spacy.load('de_core_news_sm') nlp = spacy.load('en_core_web_md')
doc = nlp(concatedToots) doc = nlp(concatedToots)
# noun tokens that arent stop words or punctuations # noun tokens that arent stop words or punctuations
@ -96,4 +96,18 @@ def countWords(concatedToots, count):
# five most common noun tokens # five most common noun tokens
noun_freq = Counter(nouns) noun_freq = Counter(nouns)
return noun_freq.most_common(count) return noun_freq.most_common(count)
def createWordCountPerSentiment(translatedToots):
sentimentList = []
for sentiment in ['positive', 'neutral', 'negative']:
tootsSeries = translatedToots[translatedToots['sentiment'] == sentiment].toot
wordCounts = countWords(tootsSeries.str.cat(sep=' '), 5)
countList = []
for count in wordCounts:
countList.append(str(count[0]) + ' (' + str(count[1]) + ')')
list2String = ', '.join(countList)
sentimentString = sentiment + ': ' + list2String
sentimentList.append(sentimentString)
wordCountsPerSentiments = '\n'.join(sentimentList)
return wordCountsPerSentiments