made hedonodon server ready

This commit is contained in:
Robert Nasarek 2023-01-27 21:08:25 +01:00
parent 52223192b4
commit 2b98565444
10 changed files with 342 additions and 336 deletions

23
.gitignore vendored
View file

@ -1,11 +1,12 @@
database.db
plots
instance
__pycache__
hedonodon_clientcred.secret
hedonodon_usercred.secret
.fleet
test.py
.idea
cardiffnlp
venv
database.db
plots
instance
__pycache__
hedonodon_clientcred.secret
hedonodon_usercred.secret
.fleet
test.py
.idea
cardiffnlp
venv
logs.txt

View file

@ -1,48 +1,48 @@
from DbSetup import engine, session, databaseUrl
import pandas as pd
from sqlalchemy import desc, select
from Tables import Toots
def calculateSentimentCount():
query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount
FROM Toots
GROUP BY DATE(datetime),
sentiment
HAVING datetime >= DATE("now","-1 day")
AND datetime < DATE("now")'''
return pd.read_sql(
query,
databaseUrl,
parse_dates=["datetime"]
)
def calculateSentimentMean(dataframe):
negativeSentimentSum = dataframe[dataframe['sentiment'] == 'negative']['sentimentCount'].sum() * -1
positiveSentimentSum = dataframe[dataframe['sentiment'] == 'positive']['sentimentCount'].sum()
sentimentSum = dataframe['sentimentCount'].sum()
sentimentMean = (negativeSentimentSum + positiveSentimentSum) / sentimentSum
sentimentDate = dataframe.loc[0]['date']
return pd.DataFrame.from_records(
[
{
'date': sentimentDate,
'sentimentsMean': sentimentMean
}
]
)
class CRUDManager():
def saveToDatabase(self, dataframe, table:str, useIndex=False):
try:
dataframe.to_sql(table, engine, index=useIndex, if_exists="append")
except:
print(f'Could not save data to {table}!')
def loadFromDatabase(self, table:str, indexColumn=None):
return pd.read_sql_table(table, databaseUrl, index_col=indexColumn)
def getLastToot(self):
stmt = select(Toots.tootId).order_by(desc('datetime'))
return session.scalars(stmt).first()
from DbSetup import connection, engine, session, databaseUrl
import pandas as pd
from sqlalchemy import desc, select, sql
from Tables import Toots
def calculateSentimentCount():
query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount
FROM Toots
GROUP BY DATE(datetime),
sentiment
HAVING datetime >= DATE("now","-1 day")
AND datetime < DATE("now")'''
return pd.read_sql(
sql.text(query),
connection,
parse_dates=["datetime"]
)
def calculateSentimentMean(dataframe):
negativeSentimentSum = dataframe[dataframe['sentiment'] == 'negative']['sentimentCount'].sum() * -1
positiveSentimentSum = dataframe[dataframe['sentiment'] == 'positive']['sentimentCount'].sum()
sentimentSum = dataframe['sentimentCount'].sum()
sentimentMean = (negativeSentimentSum + positiveSentimentSum) / sentimentSum
sentimentDate = dataframe.loc[0]['date']
return pd.DataFrame.from_records(
[
{
'date': sentimentDate,
'sentimentsMean': sentimentMean
}
]
)
class CRUDManager():
def saveToDatabase(self, dataframe, table:str, useIndex=False):
try:
dataframe.to_sql(table, engine, index=useIndex, if_exists="append")
except:
print(f'Could not save data to {table}!')
def loadFromDatabase(self, table:str, indexColumn=None):
return pd.read_sql_table(table, connection, index_col=indexColumn)
def getLastToot(self):
stmt = select(Toots.tootId).order_by(desc('datetime'))
return session.scalars(stmt).first()

View file

@ -1,11 +1,12 @@
from sqlalchemy import create_engine
from sqlalchemy.orm import Session
from sqlalchemy.ext.declarative import declarative_base
databaseUrl = 'sqlite:///database.db'
engine = create_engine(databaseUrl, future=True)
session = Session(engine)
Base = declarative_base()
def init_db():
Base.metadata.create_all(bind=engine)
from sqlalchemy import create_engine
from sqlalchemy.orm import Session
from sqlalchemy.ext.declarative import declarative_base
databaseUrl = 'sqlite:///database.db'
engine = create_engine(databaseUrl, future=True)
connection = engine.connect()
session = Session(engine)
Base = declarative_base()
def init_db():
Base.metadata.create_all(bind=engine)

196
Main.py
View file

@ -1,98 +1,98 @@
from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean
from datetime import datetime, date
from DbSetup import init_db
import locale
from MastodonAccountManager import MastodonAccountManager
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from TootCrawler import TootCrawler
locale.setlocale(locale.LC_TIME, "en_EN.UTF-8")
init_db()
mastodonAccountManager = MastodonAccountManager()
mastodonInstance = mastodonAccountManager.instance
"""
mastodonInstance.log_in(
'USER-EMAIL',
'PW',
to_file = 'hedonodon_usercred.secret'
)
"""
tootCrawler = TootCrawler(mastodonInstance)
crudManager = CRUDManager()
lastTootId = crudManager.getLastToot()
tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
if not tootsDataframe.empty:
crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
else:
print('Nothing changed since last database insert!')
sentimentsYesterday = calculateSentimentCount()
sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
if not tootsDataframe.empty:
crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True)
crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True)
else:
print('Nothing changed since last database insert!')
colormap = {
'negative': '#ff9999',
'neutral': '#ffcc99',
"positive": '#99ff99'
}
todaysColors = []
for sentiment in sentimentsYesterday['sentiment'].to_numpy():
todaysColors.append(colormap[sentiment])
TodayDate = datetime.strptime(sentimentsYesterday['date'][0], '%Y-%m-%d').strftime('%d.%m.%Y')
dataframe4PieChart = sentimentsYesterday.drop('date', axis=1).set_index('sentiment')
dataframe4LineChart = crudManager.loadFromDatabase('SentimentMeans', 'date').drop('index', axis=1)
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 10))
# Pie chart.
pieChartlabels = dataframe4PieChart.index.to_numpy()
pieChart = dataframe4PieChart.plot.pie(
ax=axes[0],
y='sentimentCount',
ylabel="",
labels=dataframe4PieChart['sentimentCount'],
title=f'Moods of the toots on {TodayDate} of the local timeline on fedihum.org',
colors=todaysColors,
wedgeprops=dict(linewidth=3, edgecolor='w'),
startangle=90
)
axes[0].axis('equal')
centre_circle = plt.Circle((0, 0), 0.6, fc='white')
axes[0].add_patch(centre_circle)
chartBox = axes[0].get_position()
axes[0].legend(pieChartlabels, loc='upper right', bbox_to_anchor=(0.9, 0.9))
# Line chart.
lineChart = dataframe4LineChart.plot.line(
ax=axes[1],
title='Mean of all sentiments from max positive (1) to min negative (-1)'
)
axes[1].grid(True)
axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)])
axes[1].set_ylim([-1, 1])
axes[1].xaxis.set_major_locator(mdates.MonthLocator())
axes[1].xaxis.set_minor_locator(mdates.MonthLocator(bymonthday=15))
axes[1].xaxis.set_major_formatter(plt.NullFormatter())
axes[1].xaxis.set_minor_formatter(mdates.DateFormatter('%h'))
axes[1].tick_params(which='minor', length=0)
plotFileUrl = f'./plots/{TodayDate}.png'
plt.savefig(plotFileUrl)
media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en')
from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean
from datetime import datetime, date
from DbSetup import init_db
import locale
from MastodonAccountManager import MastodonAccountManager
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from TootCrawler import TootCrawler
locale.setlocale(locale.LC_TIME, "en_US.UTF-8")
init_db()
mastodonAccountManager = MastodonAccountManager()
mastodonInstance = mastodonAccountManager.instance
"""
mastodonInstance.log_in(
'USER-EMAIL',
'PW',
to_file = 'hedonodon_usercred.secret'
)
"""
tootCrawler = TootCrawler(mastodonInstance)
crudManager = CRUDManager()
lastTootId = crudManager.getLastToot()
tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
if not tootsDataframe.empty:
crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
else:
print('Nothing changed since last database insert!')
sentimentsYesterday = calculateSentimentCount()
sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
if not tootsDataframe.empty:
crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True)
crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True)
else:
print('Nothing changed since last database insert!')
colormap = {
'negative': '#ff9999',
'neutral': '#ffcc99',
"positive": '#99ff99'
}
todaysColors = []
for sentiment in sentimentsYesterday['sentiment'].to_numpy():
todaysColors.append(colormap[sentiment])
TodayDate = datetime.strptime(sentimentsYesterday['date'][0], '%Y-%m-%d').strftime('%d.%m.%Y')
dataframe4PieChart = sentimentsYesterday.drop('date', axis=1).set_index('sentiment')
dataframe4LineChart = crudManager.loadFromDatabase('SentimentMeans', 'date').drop('index', axis=1)
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 10))
# Pie chart.
pieChartlabels = dataframe4PieChart.index.to_numpy()
pieChart = dataframe4PieChart.plot.pie(
ax=axes[0],
y='sentimentCount',
ylabel="",
labels=dataframe4PieChart['sentimentCount'],
title=f'Moods of the toots on {TodayDate} of the local timeline on fedihum.org',
colors=todaysColors,
wedgeprops=dict(linewidth=3, edgecolor='w'),
startangle=90
)
axes[0].axis('equal')
centre_circle = plt.Circle((0, 0), 0.6, fc='white')
axes[0].add_patch(centre_circle)
chartBox = axes[0].get_position()
axes[0].legend(pieChartlabels, loc='upper right', bbox_to_anchor=(0.9, 0.9))
# Line chart.
lineChart = dataframe4LineChart.plot.line(
ax=axes[1],
title='Mean of all sentiments from max positive (1) to min negative (-1)'
)
axes[1].grid(True)
axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)])
axes[1].set_ylim([-1, 1])
axes[1].xaxis.set_major_locator(mdates.MonthLocator())
axes[1].xaxis.set_minor_locator(mdates.MonthLocator(bymonthday=15))
axes[1].xaxis.set_major_formatter(plt.NullFormatter())
axes[1].xaxis.set_minor_formatter(mdates.DateFormatter('%h'))
axes[1].tick_params(which='minor', length=0)
plotFileUrl = f'./plots/{TodayDate}.png'
plt.savefig(plotFileUrl)
media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en')

View file

@ -1,5 +1,5 @@
from mastodon import Mastodon
class MastodonAccountManager():
def __init__(self):
self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret')
from mastodon import Mastodon
class MastodonAccountManager():
def __init__(self):
self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret')

View file

@ -1,4 +1,4 @@
# Hedonodon
I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds.
# Hedonodon
I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds.
More Documentation coming soon!

View file

@ -1,74 +1,74 @@
from germansentiment import SentimentModel
import numpy as np
from scipy.special import softmax
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
# Preprocess text (username and link placeholders)
def preprocess(text):
new_text = []
for t in text.split(" "):
t = '@user' if t.startswith('@') and len(t) > 1 else t
t = 'http' if t.startswith('http') else t
new_text.append(t)
return " ".join(new_text)
class SentiTooter:
""""""
def __init__(self):
self.deModel = SentimentModel()
self.enModelType = f"cardiffnlp/twitter-roberta-base-sentiment"
self.enModel, self.enTokenizer = self.initModel()
# https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt
self.labels = ['negative', 'neutral', 'positive']
self.sia = SentimentIntensityAnalyzer()
def analyze(self, language, content):
match language:
case 'de':
sentiment = self.deModel.predict_sentiment([content])
sentiment.append('germanSentiment')
return sentiment
case 'en':
text = preprocess(content)
encoded_input = self.enTokenizer(text, return_tensors='pt')
output = self.enModel(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)
sentimentIndexWithMaxScore = np.argmax(scores)
sentimentLabel = self.labels[sentimentIndexWithMaxScore]
sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment']
return sentiment
case _:
compound = self.sia.polarity_scores(content)['compound']
if compound > (1 / 3):
return ['positive', 'vaderSentiment']
elif compound < (-1 / 3):
return ['negative', 'vaderSentiment']
else:
return ['neutral', 'vaderSentiment']
def initModel(self):
# PT
tokenizer = AutoTokenizer.from_pretrained(self.enModelType)
tokenizer.save_pretrained(self.enModelType)
model = AutoModelForSequenceClassification.from_pretrained(self.enModelType)
model.save_pretrained(self.enModelType)
return model, tokenizer
# # TF
# model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
# model.save_pretrained(MODEL)
# text = "Good night 😊"
# encoded_input = tokenizer(text, return_tensors='tf')
# output = model(encoded_input)
# scores = output[0][0].numpy()
# scores = softmax(scores)
from germansentiment import SentimentModel
import numpy as np
from scipy.special import softmax
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
# Preprocess text (username and link placeholders)
def preprocess(text):
new_text = []
for t in text.split(" "):
t = '@user' if t.startswith('@') and len(t) > 1 else t
t = 'http' if t.startswith('http') else t
new_text.append(t)
return " ".join(new_text)
class SentiTooter:
""""""
def __init__(self):
self.deModel = SentimentModel()
self.enModelType = f"cardiffnlp/twitter-roberta-base-sentiment"
self.enModel, self.enTokenizer = self.initModel()
# https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt
self.labels = ['negative', 'neutral', 'positive']
self.sia = SentimentIntensityAnalyzer()
def analyze(self, language, content):
match language:
case 'de':
sentiment = self.deModel.predict_sentiment([content])
sentiment.append('germanSentiment')
return sentiment
case 'en':
text = preprocess(content)
encoded_input = self.enTokenizer(text, return_tensors='pt')
output = self.enModel(**encoded_input)
scores = output[0][0].detach().numpy()
scores = softmax(scores)
sentimentIndexWithMaxScore = np.argmax(scores)
sentimentLabel = self.labels[sentimentIndexWithMaxScore]
sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment']
return sentiment
case _:
compound = self.sia.polarity_scores(content)['compound']
if compound > (1 / 3):
return ['positive', 'vaderSentiment']
elif compound < (-1 / 3):
return ['negative', 'vaderSentiment']
else:
return ['neutral', 'vaderSentiment']
def initModel(self):
# PT
tokenizer = AutoTokenizer.from_pretrained(self.enModelType)
tokenizer.save_pretrained(self.enModelType)
model = AutoModelForSequenceClassification.from_pretrained(self.enModelType)
model.save_pretrained(self.enModelType)
return model, tokenizer
# # TF
# model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
# model.save_pretrained(MODEL)
# text = "Good night 😊"
# encoded_input = tokenizer(text, return_tensors='tf')
# output = model(encoded_input)
# scores = output[0][0].numpy()
# scores = softmax(scores)

View file

@ -1,32 +1,32 @@
from DbSetup import Base
from sqlalchemy import Column, Date, Integer, Float, String
class Toots(Base):
__tablename__ = 'Toots'
__table_args__ = {'extend_existing': True}
index = Column(Integer, primary_key=True)
model = Column(String(30))
datetime = Column(Date)
language = Column(String(3))
sentiment = Column(String(8))
tootId = Column(String(255))
toot = Column(String(600))
userName = Column(String(255))
userId = Column(String(255))
class SentimentCounts(Base):
__tablename__ = 'SentimentCounts'
__table_args__ = {'extend_existing': True}
index = Column(Integer, primary_key=True)
sentimentCount = Column(Integer)
date = Column(Date, primary_key=True)
sentiment = Column(String(8))
class SentimentMeans(Base):
__tablename__ = 'SentimentMeans'
__table_args__ = {'extend_existing': True}
index = Column(Integer, primary_key=True)
date = Column(Date, primary_key=True)
from DbSetup import Base
from sqlalchemy import Column, Date, Integer, Float, String
class Toots(Base):
__tablename__ = 'Toots'
__table_args__ = {'extend_existing': True}
index = Column(Integer, primary_key=True)
model = Column(String(30))
datetime = Column(Date)
language = Column(String(3))
sentiment = Column(String(8))
tootId = Column(String(255))
toot = Column(String(600))
userName = Column(String(255))
userId = Column(String(255))
class SentimentCounts(Base):
__tablename__ = 'SentimentCounts'
__table_args__ = {'extend_existing': True}
index = Column(Integer, primary_key=True)
sentimentCount = Column(Integer)
date = Column(Date, primary_key=True)
sentiment = Column(String(8))
class SentimentMeans(Base):
__tablename__ = 'SentimentMeans'
__table_args__ = {'extend_existing': True}
index = Column(Integer, primary_key=True)
date = Column(Date, primary_key=True)
SentimentsMean = Column(Float)

View file

@ -1,48 +1,48 @@
from langdetect import detect
import pytz
import pandas as pd
import re
from SentiTooter import SentiTooter
from pprint import pprint
class TootCrawler():
def __init__(self, mastodonInstance) -> None:
self.mastodonInstance = mastodonInstance
self.compilePattern = re.compile('<.*?>')
self.sentiTooter = SentiTooter()
self.localTimezone = pytz.timezone('Europe/Berlin')
def getLocalTimeline(self, minId=None):
return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
def cleanhtml(self, raw_html):
cleantext = re.sub(self.compilePattern, '', raw_html)
cleantext = re.sub(r'http\S+', '', cleantext)
return cleantext
def buildTootsDataframe(self, minId=None):
toots = []
allTimelineResults = []
timelinePagination = self.getLocalTimeline(minId)
while timelinePagination:
allTimelineResults = allTimelineResults + timelinePagination
timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
for i in allTimelineResults:
content = self.cleanhtml(i.content)
language = detect(content)
sentiment = self.sentiTooter.analyze(language, content)
toot = {
"sentiment": sentiment[0],
"model": sentiment[1],
"toot": content,
"datetime": i.created_at.astimezone(self.localTimezone),
"language": language,
"userName": i.account.display_name,
"userId": i.account.id,
"tootId": i.id
}
toots.append(toot)
toots.sort(key=lambda item:item.get('datetime'))
from langdetect import detect
import pytz
import pandas as pd
import re
from SentiTooter import SentiTooter
from pprint import pprint
class TootCrawler():
def __init__(self, mastodonInstance) -> None:
self.mastodonInstance = mastodonInstance
self.compilePattern = re.compile('<.*?>')
self.sentiTooter = SentiTooter()
self.localTimezone = pytz.timezone('Europe/Berlin')
def getLocalTimeline(self, minId=None):
return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
def cleanhtml(self, raw_html):
cleantext = re.sub(self.compilePattern, '', raw_html)
cleantext = re.sub(r'http\S+', '', cleantext)
return cleantext
def buildTootsDataframe(self, minId=None):
toots = []
allTimelineResults = []
timelinePagination = self.getLocalTimeline(minId)
while timelinePagination:
allTimelineResults = allTimelineResults + timelinePagination
timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
for i in allTimelineResults:
content = self.cleanhtml(i.content)
language = detect(content)
sentiment = self.sentiTooter.analyze(language, content)
toot = {
"sentiment": sentiment[0],
"model": sentiment[1],
"toot": content,
"datetime": i.created_at.astimezone(self.localTimezone),
"language": language,
"userName": i.account.display_name,
"userId": i.account.id,
"tootId": i.id
}
toots.append(toot)
toots.sort(key=lambda item:item.get('datetime'))
return pd.DataFrame.from_records(toots)

View file

@ -1,8 +1,12 @@
mastodon.py
matplotlib
pandas
sqlalchemy
vader-multi
numpy
pytz
transformers
mastodon.py
matplotlib
pandas
sqlalchemy
vader-multi
langdetect
numpy
pytz
transformers
wheel
germansentiment
scipy