made hedonodon server ready

This commit is contained in:
Robert Nasarek 2023-01-27 21:08:25 +01:00
parent 52223192b4
commit 2b98565444
10 changed files with 342 additions and 336 deletions

23
.gitignore vendored
View file

@ -1,11 +1,12 @@
database.db database.db
plots plots
instance instance
__pycache__ __pycache__
hedonodon_clientcred.secret hedonodon_clientcred.secret
hedonodon_usercred.secret hedonodon_usercred.secret
.fleet .fleet
test.py test.py
.idea .idea
cardiffnlp cardiffnlp
venv venv
logs.txt

View file

@ -1,48 +1,48 @@
from DbSetup import engine, session, databaseUrl from DbSetup import connection, engine, session, databaseUrl
import pandas as pd import pandas as pd
from sqlalchemy import desc, select from sqlalchemy import desc, select, sql
from Tables import Toots from Tables import Toots
def calculateSentimentCount(): def calculateSentimentCount():
query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount
FROM Toots FROM Toots
GROUP BY DATE(datetime), GROUP BY DATE(datetime),
sentiment sentiment
HAVING datetime >= DATE("now","-1 day") HAVING datetime >= DATE("now","-1 day")
AND datetime < DATE("now")''' AND datetime < DATE("now")'''
return pd.read_sql( return pd.read_sql(
query, sql.text(query),
databaseUrl, connection,
parse_dates=["datetime"] parse_dates=["datetime"]
) )
def calculateSentimentMean(dataframe): def calculateSentimentMean(dataframe):
negativeSentimentSum = dataframe[dataframe['sentiment'] == 'negative']['sentimentCount'].sum() * -1 negativeSentimentSum = dataframe[dataframe['sentiment'] == 'negative']['sentimentCount'].sum() * -1
positiveSentimentSum = dataframe[dataframe['sentiment'] == 'positive']['sentimentCount'].sum() positiveSentimentSum = dataframe[dataframe['sentiment'] == 'positive']['sentimentCount'].sum()
sentimentSum = dataframe['sentimentCount'].sum() sentimentSum = dataframe['sentimentCount'].sum()
sentimentMean = (negativeSentimentSum + positiveSentimentSum) / sentimentSum sentimentMean = (negativeSentimentSum + positiveSentimentSum) / sentimentSum
sentimentDate = dataframe.loc[0]['date'] sentimentDate = dataframe.loc[0]['date']
return pd.DataFrame.from_records( return pd.DataFrame.from_records(
[ [
{ {
'date': sentimentDate, 'date': sentimentDate,
'sentimentsMean': sentimentMean 'sentimentsMean': sentimentMean
} }
] ]
) )
class CRUDManager(): class CRUDManager():
def saveToDatabase(self, dataframe, table:str, useIndex=False): def saveToDatabase(self, dataframe, table:str, useIndex=False):
try: try:
dataframe.to_sql(table, engine, index=useIndex, if_exists="append") dataframe.to_sql(table, engine, index=useIndex, if_exists="append")
except: except:
print(f'Could not save data to {table}!') print(f'Could not save data to {table}!')
def loadFromDatabase(self, table:str, indexColumn=None): def loadFromDatabase(self, table:str, indexColumn=None):
return pd.read_sql_table(table, databaseUrl, index_col=indexColumn) return pd.read_sql_table(table, connection, index_col=indexColumn)
def getLastToot(self): def getLastToot(self):
stmt = select(Toots.tootId).order_by(desc('datetime')) stmt = select(Toots.tootId).order_by(desc('datetime'))
return session.scalars(stmt).first() return session.scalars(stmt).first()

View file

@ -1,11 +1,12 @@
from sqlalchemy import create_engine from sqlalchemy import create_engine
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.ext.declarative import declarative_base
databaseUrl = 'sqlite:///database.db' databaseUrl = 'sqlite:///database.db'
engine = create_engine(databaseUrl, future=True) engine = create_engine(databaseUrl, future=True)
session = Session(engine) connection = engine.connect()
Base = declarative_base() session = Session(engine)
Base = declarative_base()
def init_db():
Base.metadata.create_all(bind=engine) def init_db():
Base.metadata.create_all(bind=engine)

196
Main.py
View file

@ -1,98 +1,98 @@
from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean
from datetime import datetime, date from datetime import datetime, date
from DbSetup import init_db from DbSetup import init_db
import locale import locale
from MastodonAccountManager import MastodonAccountManager from MastodonAccountManager import MastodonAccountManager
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import matplotlib.dates as mdates import matplotlib.dates as mdates
from TootCrawler import TootCrawler from TootCrawler import TootCrawler
locale.setlocale(locale.LC_TIME, "en_EN.UTF-8") locale.setlocale(locale.LC_TIME, "en_US.UTF-8")
init_db() init_db()
mastodonAccountManager = MastodonAccountManager() mastodonAccountManager = MastodonAccountManager()
mastodonInstance = mastodonAccountManager.instance mastodonInstance = mastodonAccountManager.instance
""" """
mastodonInstance.log_in( mastodonInstance.log_in(
'USER-EMAIL', 'USER-EMAIL',
'PW', 'PW',
to_file = 'hedonodon_usercred.secret' to_file = 'hedonodon_usercred.secret'
) )
""" """
tootCrawler = TootCrawler(mastodonInstance) tootCrawler = TootCrawler(mastodonInstance)
crudManager = CRUDManager() crudManager = CRUDManager()
lastTootId = crudManager.getLastToot() lastTootId = crudManager.getLastToot()
tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId) tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
if not tootsDataframe.empty: if not tootsDataframe.empty:
crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False) crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
else: else:
print('Nothing changed since last database insert!') print('Nothing changed since last database insert!')
sentimentsYesterday = calculateSentimentCount() sentimentsYesterday = calculateSentimentCount()
sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday) sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
if not tootsDataframe.empty: if not tootsDataframe.empty:
crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True) crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True)
crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True) crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True)
else: else:
print('Nothing changed since last database insert!') print('Nothing changed since last database insert!')
colormap = { colormap = {
'negative': '#ff9999', 'negative': '#ff9999',
'neutral': '#ffcc99', 'neutral': '#ffcc99',
"positive": '#99ff99' "positive": '#99ff99'
} }
todaysColors = [] todaysColors = []
for sentiment in sentimentsYesterday['sentiment'].to_numpy(): for sentiment in sentimentsYesterday['sentiment'].to_numpy():
todaysColors.append(colormap[sentiment]) todaysColors.append(colormap[sentiment])
TodayDate = datetime.strptime(sentimentsYesterday['date'][0], '%Y-%m-%d').strftime('%d.%m.%Y') TodayDate = datetime.strptime(sentimentsYesterday['date'][0], '%Y-%m-%d').strftime('%d.%m.%Y')
dataframe4PieChart = sentimentsYesterday.drop('date', axis=1).set_index('sentiment') dataframe4PieChart = sentimentsYesterday.drop('date', axis=1).set_index('sentiment')
dataframe4LineChart = crudManager.loadFromDatabase('SentimentMeans', 'date').drop('index', axis=1) dataframe4LineChart = crudManager.loadFromDatabase('SentimentMeans', 'date').drop('index', axis=1)
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 10)) fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 10))
# Pie chart. # Pie chart.
pieChartlabels = dataframe4PieChart.index.to_numpy() pieChartlabels = dataframe4PieChart.index.to_numpy()
pieChart = dataframe4PieChart.plot.pie( pieChart = dataframe4PieChart.plot.pie(
ax=axes[0], ax=axes[0],
y='sentimentCount', y='sentimentCount',
ylabel="", ylabel="",
labels=dataframe4PieChart['sentimentCount'], labels=dataframe4PieChart['sentimentCount'],
title=f'Moods of the toots on {TodayDate} of the local timeline on fedihum.org', title=f'Moods of the toots on {TodayDate} of the local timeline on fedihum.org',
colors=todaysColors, colors=todaysColors,
wedgeprops=dict(linewidth=3, edgecolor='w'), wedgeprops=dict(linewidth=3, edgecolor='w'),
startangle=90 startangle=90
) )
axes[0].axis('equal') axes[0].axis('equal')
centre_circle = plt.Circle((0, 0), 0.6, fc='white') centre_circle = plt.Circle((0, 0), 0.6, fc='white')
axes[0].add_patch(centre_circle) axes[0].add_patch(centre_circle)
chartBox = axes[0].get_position() chartBox = axes[0].get_position()
axes[0].legend(pieChartlabels, loc='upper right', bbox_to_anchor=(0.9, 0.9)) axes[0].legend(pieChartlabels, loc='upper right', bbox_to_anchor=(0.9, 0.9))
# Line chart. # Line chart.
lineChart = dataframe4LineChart.plot.line( lineChart = dataframe4LineChart.plot.line(
ax=axes[1], ax=axes[1],
title='Mean of all sentiments from max positive (1) to min negative (-1)' title='Mean of all sentiments from max positive (1) to min negative (-1)'
) )
axes[1].grid(True) axes[1].grid(True)
axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)]) axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)])
axes[1].set_ylim([-1, 1]) axes[1].set_ylim([-1, 1])
axes[1].xaxis.set_major_locator(mdates.MonthLocator()) axes[1].xaxis.set_major_locator(mdates.MonthLocator())
axes[1].xaxis.set_minor_locator(mdates.MonthLocator(bymonthday=15)) axes[1].xaxis.set_minor_locator(mdates.MonthLocator(bymonthday=15))
axes[1].xaxis.set_major_formatter(plt.NullFormatter()) axes[1].xaxis.set_major_formatter(plt.NullFormatter())
axes[1].xaxis.set_minor_formatter(mdates.DateFormatter('%h')) axes[1].xaxis.set_minor_formatter(mdates.DateFormatter('%h'))
axes[1].tick_params(which='minor', length=0) axes[1].tick_params(which='minor', length=0)
plotFileUrl = f'./plots/{TodayDate}.png' plotFileUrl = f'./plots/{TodayDate}.png'
plt.savefig(plotFileUrl) plt.savefig(plotFileUrl)
media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.") media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en') mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en')

View file

@ -1,5 +1,5 @@
from mastodon import Mastodon from mastodon import Mastodon
class MastodonAccountManager(): class MastodonAccountManager():
def __init__(self): def __init__(self):
self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret') self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret')

View file

@ -1,4 +1,4 @@
# Hedonodon # Hedonodon
I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds. I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds.
More Documentation coming soon! More Documentation coming soon!

View file

@ -1,74 +1,74 @@
from germansentiment import SentimentModel from germansentiment import SentimentModel
import numpy as np import numpy as np
from scipy.special import softmax from scipy.special import softmax
from transformers import AutoModelForSequenceClassification from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer from transformers import AutoTokenizer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
# Preprocess text (username and link placeholders) # Preprocess text (username and link placeholders)
def preprocess(text): def preprocess(text):
new_text = [] new_text = []
for t in text.split(" "): for t in text.split(" "):
t = '@user' if t.startswith('@') and len(t) > 1 else t t = '@user' if t.startswith('@') and len(t) > 1 else t
t = 'http' if t.startswith('http') else t t = 'http' if t.startswith('http') else t
new_text.append(t) new_text.append(t)
return " ".join(new_text) return " ".join(new_text)
class SentiTooter: class SentiTooter:
"""""" """"""
def __init__(self): def __init__(self):
self.deModel = SentimentModel() self.deModel = SentimentModel()
self.enModelType = f"cardiffnlp/twitter-roberta-base-sentiment" self.enModelType = f"cardiffnlp/twitter-roberta-base-sentiment"
self.enModel, self.enTokenizer = self.initModel() self.enModel, self.enTokenizer = self.initModel()
# https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt # https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt
self.labels = ['negative', 'neutral', 'positive'] self.labels = ['negative', 'neutral', 'positive']
self.sia = SentimentIntensityAnalyzer() self.sia = SentimentIntensityAnalyzer()
def analyze(self, language, content): def analyze(self, language, content):
match language: match language:
case 'de': case 'de':
sentiment = self.deModel.predict_sentiment([content]) sentiment = self.deModel.predict_sentiment([content])
sentiment.append('germanSentiment') sentiment.append('germanSentiment')
return sentiment return sentiment
case 'en': case 'en':
text = preprocess(content) text = preprocess(content)
encoded_input = self.enTokenizer(text, return_tensors='pt') encoded_input = self.enTokenizer(text, return_tensors='pt')
output = self.enModel(**encoded_input) output = self.enModel(**encoded_input)
scores = output[0][0].detach().numpy() scores = output[0][0].detach().numpy()
scores = softmax(scores) scores = softmax(scores)
sentimentIndexWithMaxScore = np.argmax(scores) sentimentIndexWithMaxScore = np.argmax(scores)
sentimentLabel = self.labels[sentimentIndexWithMaxScore] sentimentLabel = self.labels[sentimentIndexWithMaxScore]
sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment'] sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment']
return sentiment return sentiment
case _: case _:
compound = self.sia.polarity_scores(content)['compound'] compound = self.sia.polarity_scores(content)['compound']
if compound > (1 / 3): if compound > (1 / 3):
return ['positive', 'vaderSentiment'] return ['positive', 'vaderSentiment']
elif compound < (-1 / 3): elif compound < (-1 / 3):
return ['negative', 'vaderSentiment'] return ['negative', 'vaderSentiment']
else: else:
return ['neutral', 'vaderSentiment'] return ['neutral', 'vaderSentiment']
def initModel(self): def initModel(self):
# PT # PT
tokenizer = AutoTokenizer.from_pretrained(self.enModelType) tokenizer = AutoTokenizer.from_pretrained(self.enModelType)
tokenizer.save_pretrained(self.enModelType) tokenizer.save_pretrained(self.enModelType)
model = AutoModelForSequenceClassification.from_pretrained(self.enModelType) model = AutoModelForSequenceClassification.from_pretrained(self.enModelType)
model.save_pretrained(self.enModelType) model.save_pretrained(self.enModelType)
return model, tokenizer return model, tokenizer
# # TF # # TF
# model = TFAutoModelForSequenceClassification.from_pretrained(MODEL) # model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
# model.save_pretrained(MODEL) # model.save_pretrained(MODEL)
# text = "Good night 😊" # text = "Good night 😊"
# encoded_input = tokenizer(text, return_tensors='tf') # encoded_input = tokenizer(text, return_tensors='tf')
# output = model(encoded_input) # output = model(encoded_input)
# scores = output[0][0].numpy() # scores = output[0][0].numpy()
# scores = softmax(scores) # scores = softmax(scores)

View file

@ -1,32 +1,32 @@
from DbSetup import Base from DbSetup import Base
from sqlalchemy import Column, Date, Integer, Float, String from sqlalchemy import Column, Date, Integer, Float, String
class Toots(Base): class Toots(Base):
__tablename__ = 'Toots' __tablename__ = 'Toots'
__table_args__ = {'extend_existing': True} __table_args__ = {'extend_existing': True}
index = Column(Integer, primary_key=True) index = Column(Integer, primary_key=True)
model = Column(String(30)) model = Column(String(30))
datetime = Column(Date) datetime = Column(Date)
language = Column(String(3)) language = Column(String(3))
sentiment = Column(String(8)) sentiment = Column(String(8))
tootId = Column(String(255)) tootId = Column(String(255))
toot = Column(String(600)) toot = Column(String(600))
userName = Column(String(255)) userName = Column(String(255))
userId = Column(String(255)) userId = Column(String(255))
class SentimentCounts(Base): class SentimentCounts(Base):
__tablename__ = 'SentimentCounts' __tablename__ = 'SentimentCounts'
__table_args__ = {'extend_existing': True} __table_args__ = {'extend_existing': True}
index = Column(Integer, primary_key=True) index = Column(Integer, primary_key=True)
sentimentCount = Column(Integer) sentimentCount = Column(Integer)
date = Column(Date, primary_key=True) date = Column(Date, primary_key=True)
sentiment = Column(String(8)) sentiment = Column(String(8))
class SentimentMeans(Base): class SentimentMeans(Base):
__tablename__ = 'SentimentMeans' __tablename__ = 'SentimentMeans'
__table_args__ = {'extend_existing': True} __table_args__ = {'extend_existing': True}
index = Column(Integer, primary_key=True) index = Column(Integer, primary_key=True)
date = Column(Date, primary_key=True) date = Column(Date, primary_key=True)
SentimentsMean = Column(Float) SentimentsMean = Column(Float)

View file

@ -1,48 +1,48 @@
from langdetect import detect from langdetect import detect
import pytz import pytz
import pandas as pd import pandas as pd
import re import re
from SentiTooter import SentiTooter from SentiTooter import SentiTooter
from pprint import pprint from pprint import pprint
class TootCrawler(): class TootCrawler():
def __init__(self, mastodonInstance) -> None: def __init__(self, mastodonInstance) -> None:
self.mastodonInstance = mastodonInstance self.mastodonInstance = mastodonInstance
self.compilePattern = re.compile('<.*?>') self.compilePattern = re.compile('<.*?>')
self.sentiTooter = SentiTooter() self.sentiTooter = SentiTooter()
self.localTimezone = pytz.timezone('Europe/Berlin') self.localTimezone = pytz.timezone('Europe/Berlin')
def getLocalTimeline(self, minId=None): def getLocalTimeline(self, minId=None):
return self.mastodonInstance.timeline_local(min_id=minId, limit=500) return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
def cleanhtml(self, raw_html): def cleanhtml(self, raw_html):
cleantext = re.sub(self.compilePattern, '', raw_html) cleantext = re.sub(self.compilePattern, '', raw_html)
cleantext = re.sub(r'http\S+', '', cleantext) cleantext = re.sub(r'http\S+', '', cleantext)
return cleantext return cleantext
def buildTootsDataframe(self, minId=None): def buildTootsDataframe(self, minId=None):
toots = [] toots = []
allTimelineResults = [] allTimelineResults = []
timelinePagination = self.getLocalTimeline(minId) timelinePagination = self.getLocalTimeline(minId)
while timelinePagination: while timelinePagination:
allTimelineResults = allTimelineResults + timelinePagination allTimelineResults = allTimelineResults + timelinePagination
timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination) timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
for i in allTimelineResults: for i in allTimelineResults:
content = self.cleanhtml(i.content) content = self.cleanhtml(i.content)
language = detect(content) language = detect(content)
sentiment = self.sentiTooter.analyze(language, content) sentiment = self.sentiTooter.analyze(language, content)
toot = { toot = {
"sentiment": sentiment[0], "sentiment": sentiment[0],
"model": sentiment[1], "model": sentiment[1],
"toot": content, "toot": content,
"datetime": i.created_at.astimezone(self.localTimezone), "datetime": i.created_at.astimezone(self.localTimezone),
"language": language, "language": language,
"userName": i.account.display_name, "userName": i.account.display_name,
"userId": i.account.id, "userId": i.account.id,
"tootId": i.id "tootId": i.id
} }
toots.append(toot) toots.append(toot)
toots.sort(key=lambda item:item.get('datetime')) toots.sort(key=lambda item:item.get('datetime'))
return pd.DataFrame.from_records(toots) return pd.DataFrame.from_records(toots)

View file

@ -1,8 +1,12 @@
mastodon.py mastodon.py
matplotlib matplotlib
pandas pandas
sqlalchemy sqlalchemy
vader-multi vader-multi
numpy langdetect
pytz numpy
transformers pytz
transformers
wheel
germansentiment
scipy