implement word counts.

2023-03-15 16:02:47 +01:00 · 2023-03-15 16:02:47 +01:00 · 4479bd2429
commit 4479bd2429
parent 6a8caac29e
2 changed files with 47 additions and 18 deletions
--- a/Main.py
+++ b/Main.py
@ -6,11 +6,12 @@ from MastodonAccountManager import MastodonAccountManager
 import matplotlib.pyplot as plt
 import matplotlib.dates as mdates
 from TootCrawler import TootCrawler
-from SentiTooter import translateToots, countWords
+from SentiTooter import translateToots, createWordCountPerSentiment
 locale.setlocale(locale.LC_TIME, "en_US.UTF-8")
 init_db()
 print('Initialize Mastodon...')
 mastodonAccountManager = MastodonAccountManager()
 mastodonInstance = mastodonAccountManager.instance
 """
@ -20,34 +21,47 @@ mastodonInstance.log_in(
    to_file = 'hedonodon_usercred.secret'
 )
 """
 print('done!')
 print('Fetching recent toots...')
 tootCrawler = TootCrawler(mastodonInstance)
 crudManager = CRUDManager()
 lastTootId = crudManager.getLastToot()
 tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
 print('done!')
 print('Save toots to database...')
 if not tootsDataframe.empty:
    crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
    print('done!')
 else:
-    print('Nothing changed since last database insert!')
+    print('nothing changed since last database insert!')
 print('Calculate word counts...')
 yesterdaysToots = getYesterdaysToots()
 translatedToots = translateToots(yesterdaysToots)
-tootsSeries = translatedToots.toot
+wordCountsPerSentiment = createWordCountPerSentiment(translatedToots)
-wordCounts = countWords(tootsSeries.str.cat(sep=' '), 10)
+print('done!')
 print(wordCounts);
 print("exit programm")
 exit()
 sentimentsYesterday = calculateSentimentCount()
 sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
 print(wordCountsPerSentiment);
 print('Calculate sentiment counts...')
 sentimentsYesterday = calculateSentimentCount()
 print('done!')
 print('Calculate sentiment mean...')
 sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
 print('done!')
 print('Save calculations to database...')
 if not tootsDataframe.empty:
    crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True)
    crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True)
    print('done!')
 else:
-    print('Nothing changed since last database insert!')
+    print('nothing changed since last database insert!')
 print('Create figure...')
 colormap = {
    'negative': '#ff9999',
    'neutral': '#ffcc99',
@ -100,7 +114,8 @@ axes[1].xaxis.set_minor_formatter(mdates.DateFormatter('%h'))
 axes[1].tick_params(which='minor', length=0)
 plotFileUrl = f'./plots/{TodayDate}.png'
 plt.savefig(plotFileUrl)
 print('done!')
-#media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
+media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
-#mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en')
+mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.\nWord counts per sentiment:\n{wordCountsPerSentiment}', media_ids=media, language='en')
--- a/SentiTooter.py
+++ b/SentiTooter.py
@ -71,20 +71,20 @@ class SentiTooter:
 def translateToots(yesterdaysToots):
    yesterdaysTootsTranslated = yesterdaysToots
    for index, row in yesterdaysTootsTranslated.iterrows():
-        if (row['language'] != 'de'):
+        if (row['language'] != 'en'):
            try:
                yesterdaysTootsTranslated.at[index,'toot'] = translateToot(row['language'], row['toot'])
-                yesterdaysTootsTranslated.at[index,'language'] = 'de'
+                yesterdaysTootsTranslated.at[index,'language'] = 'en'
            except:
                yesterdaysTootsTranslated.drop(index)
    return yesterdaysTootsTranslated
 def translateToot(language, toot):
    content = preprocess(toot)
-    return GoogleTranslator(source=language, target='de').translate(content)
+    return GoogleTranslator(source=language, target='en').translate(content)
 def countWords(concatedToots, count):
-    nlp = spacy.load('de_core_news_sm')
+    nlp = spacy.load('en_core_web_md')
    doc = nlp(concatedToots)
    # noun tokens that arent stop words or punctuations
@ -96,4 +96,18 @@ def countWords(concatedToots, count):
    # five most common noun tokens
    noun_freq = Counter(nouns)
-    return noun_freq.most_common(count)
+    return noun_freq.most_common(count)
 def createWordCountPerSentiment(translatedToots):
    sentimentList = []
    for sentiment in ['positive', 'neutral', 'negative']:
        tootsSeries = translatedToots[translatedToots['sentiment'] == sentiment].toot
        wordCounts = countWords(tootsSeries.str.cat(sep=' '), 5)
        countList = []
        for count in wordCounts:
             countList.append(str(count[0]) + ' (' + str(count[1]) + ')')
        list2String = ', '.join(countList)
        sentimentString = sentiment + ': ' + list2String
        sentimentList.append(sentimentString)
    wordCountsPerSentiments = '\n'.join(sentimentList)
    return wordCountsPerSentiments