implement word counts.
This commit is contained in:
parent
6a8caac29e
commit
4479bd2429
2 changed files with 47 additions and 18 deletions
41
Main.py
41
Main.py
|
|
@ -6,11 +6,12 @@ from MastodonAccountManager import MastodonAccountManager
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
import matplotlib.dates as mdates
|
import matplotlib.dates as mdates
|
||||||
from TootCrawler import TootCrawler
|
from TootCrawler import TootCrawler
|
||||||
from SentiTooter import translateToots, countWords
|
from SentiTooter import translateToots, createWordCountPerSentiment
|
||||||
|
|
||||||
locale.setlocale(locale.LC_TIME, "en_US.UTF-8")
|
locale.setlocale(locale.LC_TIME, "en_US.UTF-8")
|
||||||
init_db()
|
init_db()
|
||||||
|
|
||||||
|
print('Initialize Mastodon...')
|
||||||
mastodonAccountManager = MastodonAccountManager()
|
mastodonAccountManager = MastodonAccountManager()
|
||||||
mastodonInstance = mastodonAccountManager.instance
|
mastodonInstance = mastodonAccountManager.instance
|
||||||
"""
|
"""
|
||||||
|
|
@ -20,34 +21,47 @@ mastodonInstance.log_in(
|
||||||
to_file = 'hedonodon_usercred.secret'
|
to_file = 'hedonodon_usercred.secret'
|
||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
|
print('done!')
|
||||||
|
|
||||||
|
print('Fetching recent toots...')
|
||||||
tootCrawler = TootCrawler(mastodonInstance)
|
tootCrawler = TootCrawler(mastodonInstance)
|
||||||
crudManager = CRUDManager()
|
crudManager = CRUDManager()
|
||||||
|
|
||||||
lastTootId = crudManager.getLastToot()
|
lastTootId = crudManager.getLastToot()
|
||||||
tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
|
tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
|
||||||
|
print('done!')
|
||||||
|
|
||||||
|
print('Save toots to database...')
|
||||||
if not tootsDataframe.empty:
|
if not tootsDataframe.empty:
|
||||||
crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
|
crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
|
||||||
|
print('done!')
|
||||||
else:
|
else:
|
||||||
print('Nothing changed since last database insert!')
|
print('nothing changed since last database insert!')
|
||||||
|
|
||||||
|
print('Calculate word counts...')
|
||||||
yesterdaysToots = getYesterdaysToots()
|
yesterdaysToots = getYesterdaysToots()
|
||||||
translatedToots = translateToots(yesterdaysToots)
|
translatedToots = translateToots(yesterdaysToots)
|
||||||
tootsSeries = translatedToots.toot
|
wordCountsPerSentiment = createWordCountPerSentiment(translatedToots)
|
||||||
wordCounts = countWords(tootsSeries.str.cat(sep=' '), 10)
|
print('done!')
|
||||||
print(wordCounts);
|
|
||||||
print("exit programm")
|
|
||||||
exit()
|
|
||||||
sentimentsYesterday = calculateSentimentCount()
|
|
||||||
sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
|
|
||||||
|
|
||||||
|
print(wordCountsPerSentiment);
|
||||||
|
|
||||||
|
print('Calculate sentiment counts...')
|
||||||
|
sentimentsYesterday = calculateSentimentCount()
|
||||||
|
print('done!')
|
||||||
|
|
||||||
|
print('Calculate sentiment mean...')
|
||||||
|
sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
|
||||||
|
print('done!')
|
||||||
|
|
||||||
|
print('Save calculations to database...')
|
||||||
if not tootsDataframe.empty:
|
if not tootsDataframe.empty:
|
||||||
crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True)
|
crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True)
|
||||||
crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True)
|
crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True)
|
||||||
|
print('done!')
|
||||||
else:
|
else:
|
||||||
print('Nothing changed since last database insert!')
|
print('nothing changed since last database insert!')
|
||||||
|
|
||||||
|
print('Create figure...')
|
||||||
colormap = {
|
colormap = {
|
||||||
'negative': '#ff9999',
|
'negative': '#ff9999',
|
||||||
'neutral': '#ffcc99',
|
'neutral': '#ffcc99',
|
||||||
|
|
@ -100,7 +114,8 @@ axes[1].xaxis.set_minor_formatter(mdates.DateFormatter('%h'))
|
||||||
axes[1].tick_params(which='minor', length=0)
|
axes[1].tick_params(which='minor', length=0)
|
||||||
plotFileUrl = f'./plots/{TodayDate}.png'
|
plotFileUrl = f'./plots/{TodayDate}.png'
|
||||||
plt.savefig(plotFileUrl)
|
plt.savefig(plotFileUrl)
|
||||||
|
print('done!')
|
||||||
|
|
||||||
#media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
|
media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
|
||||||
#mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en')
|
mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.\nWord counts per sentiment:\n{wordCountsPerSentiment}', media_ids=media, language='en')
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -71,20 +71,20 @@ class SentiTooter:
|
||||||
def translateToots(yesterdaysToots):
|
def translateToots(yesterdaysToots):
|
||||||
yesterdaysTootsTranslated = yesterdaysToots
|
yesterdaysTootsTranslated = yesterdaysToots
|
||||||
for index, row in yesterdaysTootsTranslated.iterrows():
|
for index, row in yesterdaysTootsTranslated.iterrows():
|
||||||
if (row['language'] != 'de'):
|
if (row['language'] != 'en'):
|
||||||
try:
|
try:
|
||||||
yesterdaysTootsTranslated.at[index,'toot'] = translateToot(row['language'], row['toot'])
|
yesterdaysTootsTranslated.at[index,'toot'] = translateToot(row['language'], row['toot'])
|
||||||
yesterdaysTootsTranslated.at[index,'language'] = 'de'
|
yesterdaysTootsTranslated.at[index,'language'] = 'en'
|
||||||
except:
|
except:
|
||||||
yesterdaysTootsTranslated.drop(index)
|
yesterdaysTootsTranslated.drop(index)
|
||||||
return yesterdaysTootsTranslated
|
return yesterdaysTootsTranslated
|
||||||
|
|
||||||
def translateToot(language, toot):
|
def translateToot(language, toot):
|
||||||
content = preprocess(toot)
|
content = preprocess(toot)
|
||||||
return GoogleTranslator(source=language, target='de').translate(content)
|
return GoogleTranslator(source=language, target='en').translate(content)
|
||||||
|
|
||||||
def countWords(concatedToots, count):
|
def countWords(concatedToots, count):
|
||||||
nlp = spacy.load('de_core_news_sm')
|
nlp = spacy.load('en_core_web_md')
|
||||||
doc = nlp(concatedToots)
|
doc = nlp(concatedToots)
|
||||||
|
|
||||||
# noun tokens that arent stop words or punctuations
|
# noun tokens that arent stop words or punctuations
|
||||||
|
|
@ -96,4 +96,18 @@ def countWords(concatedToots, count):
|
||||||
|
|
||||||
# five most common noun tokens
|
# five most common noun tokens
|
||||||
noun_freq = Counter(nouns)
|
noun_freq = Counter(nouns)
|
||||||
return noun_freq.most_common(count)
|
return noun_freq.most_common(count)
|
||||||
|
|
||||||
|
def createWordCountPerSentiment(translatedToots):
|
||||||
|
sentimentList = []
|
||||||
|
for sentiment in ['positive', 'neutral', 'negative']:
|
||||||
|
tootsSeries = translatedToots[translatedToots['sentiment'] == sentiment].toot
|
||||||
|
wordCounts = countWords(tootsSeries.str.cat(sep=' '), 5)
|
||||||
|
countList = []
|
||||||
|
for count in wordCounts:
|
||||||
|
countList.append(str(count[0]) + ' (' + str(count[1]) + ')')
|
||||||
|
list2String = ', '.join(countList)
|
||||||
|
sentimentString = sentiment + ': ' + list2String
|
||||||
|
sentimentList.append(sentimentString)
|
||||||
|
wordCountsPerSentiments = '\n'.join(sentimentList)
|
||||||
|
return wordCountsPerSentiments
|
||||||
Loading…
Add table
Add a link
Reference in a new issue