From 2b98565444e6a8e9ead4389941f7ec391566b5e2 Mon Sep 17 00:00:00 2001
From: Robert Nasarek <rbrt.nsrk@posteo.de>
Date: Fri, 27 Jan 2023 21:08:25 +0100
Subject: [PATCH 01/10] made hedonodon server ready

---
 .gitignore                |  23 ++---
 CRUDManager.py            |  96 +++++++++----------
 DbSetup.py                |  23 ++---
 Main.py                   | 196 +++++++++++++++++++-------------------
 MastodonAccountManager.py |  10 +-
 README.md                 |   6 +-
 SentiTooter.py            | 148 ++++++++++++++--------------
 Tables.py                 |  62 ++++++------
 TootCrawler.py            |  94 +++++++++---------
 requirements.txt          |  20 ++--
 10 files changed, 342 insertions(+), 336 deletions(-)

diff --git a/.gitignore b/.gitignore
index d946327..613ca0d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,12 @@
-database.db
-plots
-instance
-__pycache__
-hedonodon_clientcred.secret
-hedonodon_usercred.secret
-.fleet
-test.py
-.idea
-cardiffnlp
-venv
\ No newline at end of file
+database.db
+plots
+instance
+__pycache__
+hedonodon_clientcred.secret
+hedonodon_usercred.secret
+.fleet
+test.py
+.idea
+cardiffnlp
+venv
+logs.txt
diff --git a/CRUDManager.py b/CRUDManager.py
index 0226fbf..4f05098 100644
--- a/CRUDManager.py
+++ b/CRUDManager.py
@@ -1,48 +1,48 @@
-from DbSetup import engine, session, databaseUrl
-import pandas as pd
-from sqlalchemy import desc, select
-from Tables import Toots
-
-
-def calculateSentimentCount():
-    query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount
-                FROM Toots
-                GROUP BY DATE(datetime),
-                sentiment
-                HAVING datetime >= DATE("now","-1 day")
-                AND datetime < DATE("now")'''
-    return pd.read_sql(
-        query,
-        databaseUrl,
-        parse_dates=["datetime"]
-    )
-
-def calculateSentimentMean(dataframe):
-    negativeSentimentSum = dataframe[dataframe['sentiment'] == 'negative']['sentimentCount'].sum() * -1
-    positiveSentimentSum = dataframe[dataframe['sentiment'] == 'positive']['sentimentCount'].sum()
-    sentimentSum = dataframe['sentimentCount'].sum()
-    sentimentMean = (negativeSentimentSum + positiveSentimentSum) / sentimentSum
-    sentimentDate = dataframe.loc[0]['date']
-    return pd.DataFrame.from_records(
-        [
-            {
-                'date': sentimentDate,
-                'sentimentsMean': sentimentMean
-            }
-        ]
-    )
-
-class CRUDManager():
-
-    def saveToDatabase(self, dataframe, table:str, useIndex=False):
-        try:
-            dataframe.to_sql(table, engine, index=useIndex, if_exists="append")
-        except:
-            print(f'Could not save data to {table}!')
-
-    def loadFromDatabase(self, table:str, indexColumn=None):
-        return pd.read_sql_table(table, databaseUrl, index_col=indexColumn)
-
-    def getLastToot(self):
-        stmt = select(Toots.tootId).order_by(desc('datetime'))
-        return session.scalars(stmt).first()
\ No newline at end of file
+from DbSetup import connection, engine, session, databaseUrl
+import pandas as pd
+from sqlalchemy import desc, select, sql
+from Tables import Toots
+
+
+def calculateSentimentCount():
+    query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount
+                FROM Toots
+                GROUP BY DATE(datetime),
+                sentiment
+                HAVING datetime >= DATE("now","-1 day")
+                AND datetime < DATE("now")'''
+    return pd.read_sql(
+        sql.text(query),
+        connection,
+        parse_dates=["datetime"]
+    )
+
+def calculateSentimentMean(dataframe):
+    negativeSentimentSum = dataframe[dataframe['sentiment'] == 'negative']['sentimentCount'].sum() * -1
+    positiveSentimentSum = dataframe[dataframe['sentiment'] == 'positive']['sentimentCount'].sum()
+    sentimentSum = dataframe['sentimentCount'].sum()
+    sentimentMean = (negativeSentimentSum + positiveSentimentSum) / sentimentSum
+    sentimentDate = dataframe.loc[0]['date']
+    return pd.DataFrame.from_records(
+        [
+            {
+                'date': sentimentDate,
+                'sentimentsMean': sentimentMean
+            }
+        ]
+    )
+
+class CRUDManager():
+
+    def saveToDatabase(self, dataframe, table:str, useIndex=False):
+        try:
+            dataframe.to_sql(table, engine, index=useIndex, if_exists="append")
+        except:
+            print(f'Could not save data to {table}!')
+
+    def loadFromDatabase(self, table:str, indexColumn=None):
+        return pd.read_sql_table(table, connection, index_col=indexColumn)
+
+    def getLastToot(self):
+        stmt = select(Toots.tootId).order_by(desc('datetime'))
+        return session.scalars(stmt).first()
diff --git a/DbSetup.py b/DbSetup.py
index 1898556..c787928 100644
--- a/DbSetup.py
+++ b/DbSetup.py
@@ -1,11 +1,12 @@
-from sqlalchemy import create_engine
-from sqlalchemy.orm import Session
-from sqlalchemy.ext.declarative import declarative_base
-
-databaseUrl = 'sqlite:///database.db'
-engine = create_engine(databaseUrl, future=True)
-session = Session(engine)
-Base = declarative_base()
-
-def init_db():
-     Base.metadata.create_all(bind=engine)
+from sqlalchemy import create_engine
+from sqlalchemy.orm import Session
+from sqlalchemy.ext.declarative import declarative_base
+
+databaseUrl = 'sqlite:///database.db'
+engine = create_engine(databaseUrl, future=True)
+connection = engine.connect()
+session = Session(engine)
+Base = declarative_base()
+
+def init_db():
+     Base.metadata.create_all(bind=engine)
diff --git a/Main.py b/Main.py
index b80b3a6..2b6768a 100644
--- a/Main.py
+++ b/Main.py
@@ -1,98 +1,98 @@
-from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean
-from datetime import datetime, date
-from DbSetup import init_db
-import locale
-from MastodonAccountManager import MastodonAccountManager
-import matplotlib.pyplot as plt
-import matplotlib.dates as mdates
-from TootCrawler import TootCrawler
-
-locale.setlocale(locale.LC_TIME, "en_EN.UTF-8")
-init_db()
-
-mastodonAccountManager = MastodonAccountManager()
-mastodonInstance = mastodonAccountManager.instance
-"""
-mastodonInstance.log_in(
-    'USER-EMAIL',
-    'PW',
-    to_file = 'hedonodon_usercred.secret'
-)
-"""
-
-tootCrawler = TootCrawler(mastodonInstance)
-crudManager = CRUDManager()
-
-lastTootId = crudManager.getLastToot()
-tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
-
-if not tootsDataframe.empty:
-    crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
-else:
-    print('Nothing changed since last database insert!')
-
-sentimentsYesterday = calculateSentimentCount()
-sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
-
-if not tootsDataframe.empty:
-    crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True)
-    crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True)
-else:
-    print('Nothing changed since last database insert!')
-
-colormap = {
-    'negative': '#ff9999',
-    'neutral': '#ffcc99',
-    "positive": '#99ff99'
-}
-
-todaysColors = []
-for sentiment in sentimentsYesterday['sentiment'].to_numpy():
-    todaysColors.append(colormap[sentiment])
-
-
-
-TodayDate = datetime.strptime(sentimentsYesterday['date'][0], '%Y-%m-%d').strftime('%d.%m.%Y')
-dataframe4PieChart = sentimentsYesterday.drop('date', axis=1).set_index('sentiment')
-dataframe4LineChart = crudManager.loadFromDatabase('SentimentMeans', 'date').drop('index', axis=1)
-
-fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 10))
-
-# Pie chart.
-pieChartlabels = dataframe4PieChart.index.to_numpy()
-pieChart = dataframe4PieChart.plot.pie(
-    ax=axes[0],
-    y='sentimentCount',
-    ylabel="",
-    labels=dataframe4PieChart['sentimentCount'],
-    title=f'Moods of the toots on {TodayDate} of the local timeline on fedihum.org',
-    colors=todaysColors,
-    wedgeprops=dict(linewidth=3, edgecolor='w'),
-    startangle=90
-)
-
-axes[0].axis('equal')
-centre_circle = plt.Circle((0, 0), 0.6, fc='white')
-axes[0].add_patch(centre_circle)
-chartBox = axes[0].get_position()
-axes[0].legend(pieChartlabels, loc='upper right', bbox_to_anchor=(0.9, 0.9))
-
-# Line chart.
-lineChart = dataframe4LineChart.plot.line(
-    ax=axes[1],
-    title='Mean of all sentiments from max positive (1) to min negative (-1)'
-)
-axes[1].grid(True)
-axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)])
-axes[1].set_ylim([-1, 1])
-axes[1].xaxis.set_major_locator(mdates.MonthLocator())
-axes[1].xaxis.set_minor_locator(mdates.MonthLocator(bymonthday=15))
-axes[1].xaxis.set_major_formatter(plt.NullFormatter())
-axes[1].xaxis.set_minor_formatter(mdates.DateFormatter('%h'))
-axes[1].tick_params(which='minor', length=0)
-plotFileUrl = f'./plots/{TodayDate}.png'
-plt.savefig(plotFileUrl)
-
-media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
-mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en')
-
+from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean
+from datetime import datetime, date
+from DbSetup import init_db
+import locale
+from MastodonAccountManager import MastodonAccountManager
+import matplotlib.pyplot as plt
+import matplotlib.dates as mdates
+from TootCrawler import TootCrawler
+
+locale.setlocale(locale.LC_TIME, "en_US.UTF-8")
+init_db()
+
+mastodonAccountManager = MastodonAccountManager()
+mastodonInstance = mastodonAccountManager.instance
+"""
+mastodonInstance.log_in(
+    'USER-EMAIL',
+    'PW',
+    to_file = 'hedonodon_usercred.secret'
+)
+"""
+
+tootCrawler = TootCrawler(mastodonInstance)
+crudManager = CRUDManager()
+
+lastTootId = crudManager.getLastToot()
+tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
+
+if not tootsDataframe.empty:
+    crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
+else:
+    print('Nothing changed since last database insert!')
+
+sentimentsYesterday = calculateSentimentCount()
+sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
+
+if not tootsDataframe.empty:
+    crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True)
+    crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True)
+else:
+    print('Nothing changed since last database insert!')
+
+colormap = {
+    'negative': '#ff9999',
+    'neutral': '#ffcc99',
+    "positive": '#99ff99'
+}
+
+todaysColors = []
+for sentiment in sentimentsYesterday['sentiment'].to_numpy():
+    todaysColors.append(colormap[sentiment])
+
+
+
+TodayDate = datetime.strptime(sentimentsYesterday['date'][0], '%Y-%m-%d').strftime('%d.%m.%Y')
+dataframe4PieChart = sentimentsYesterday.drop('date', axis=1).set_index('sentiment')
+dataframe4LineChart = crudManager.loadFromDatabase('SentimentMeans', 'date').drop('index', axis=1)
+
+fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 10))
+
+# Pie chart.
+pieChartlabels = dataframe4PieChart.index.to_numpy()
+pieChart = dataframe4PieChart.plot.pie(
+    ax=axes[0],
+    y='sentimentCount',
+    ylabel="",
+    labels=dataframe4PieChart['sentimentCount'],
+    title=f'Moods of the toots on {TodayDate} of the local timeline on fedihum.org',
+    colors=todaysColors,
+    wedgeprops=dict(linewidth=3, edgecolor='w'),
+    startangle=90
+)
+
+axes[0].axis('equal')
+centre_circle = plt.Circle((0, 0), 0.6, fc='white')
+axes[0].add_patch(centre_circle)
+chartBox = axes[0].get_position()
+axes[0].legend(pieChartlabels, loc='upper right', bbox_to_anchor=(0.9, 0.9))
+
+# Line chart.
+lineChart = dataframe4LineChart.plot.line(
+    ax=axes[1],
+    title='Mean of all sentiments from max positive (1) to min negative (-1)'
+)
+axes[1].grid(True)
+axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)])
+axes[1].set_ylim([-1, 1])
+axes[1].xaxis.set_major_locator(mdates.MonthLocator())
+axes[1].xaxis.set_minor_locator(mdates.MonthLocator(bymonthday=15))
+axes[1].xaxis.set_major_formatter(plt.NullFormatter())
+axes[1].xaxis.set_minor_formatter(mdates.DateFormatter('%h'))
+axes[1].tick_params(which='minor', length=0)
+plotFileUrl = f'./plots/{TodayDate}.png'
+plt.savefig(plotFileUrl)
+
+media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
+mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en')
+
diff --git a/MastodonAccountManager.py b/MastodonAccountManager.py
index 9c51e54..68d62bb 100644
--- a/MastodonAccountManager.py
+++ b/MastodonAccountManager.py
@@ -1,5 +1,5 @@
-from mastodon import Mastodon
-
-class MastodonAccountManager():
-    def __init__(self):
-        self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret')
+from mastodon import Mastodon
+
+class MastodonAccountManager():
+    def __init__(self):
+        self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret')
diff --git a/README.md b/README.md
index fdbe2f7..5440dc1 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# Hedonodon
-I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds.
-
+# Hedonodon
+I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds.
+
 More Documentation coming soon!
\ No newline at end of file
diff --git a/SentiTooter.py b/SentiTooter.py
index 26b7a47..00899c2 100644
--- a/SentiTooter.py
+++ b/SentiTooter.py
@@ -1,74 +1,74 @@
-from germansentiment import SentimentModel
-import numpy as np
-from scipy.special import softmax
-from transformers import AutoModelForSequenceClassification
-from transformers import AutoTokenizer
-from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
-
-
-# Preprocess text (username and link placeholders)
-def preprocess(text):
-    new_text = []
-
-    for t in text.split(" "):
-        t = '@user' if t.startswith('@') and len(t) > 1 else t
-        t = 'http' if t.startswith('http') else t
-        new_text.append(t)
-    return " ".join(new_text)
-
-
-class SentiTooter:
-    """"""
-
-    def __init__(self):
-        self.deModel = SentimentModel()
-        self.enModelType = f"cardiffnlp/twitter-roberta-base-sentiment"
-        self.enModel, self.enTokenizer = self.initModel()
-        # https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt
-        self.labels = ['negative', 'neutral', 'positive']
-        self.sia = SentimentIntensityAnalyzer()
-
-    def analyze(self, language, content):
-        match language:
-            case 'de':
-                sentiment = self.deModel.predict_sentiment([content])
-                sentiment.append('germanSentiment')
-                return sentiment
-            case 'en':
-                text = preprocess(content)
-                encoded_input = self.enTokenizer(text, return_tensors='pt')
-                output = self.enModel(**encoded_input)
-                scores = output[0][0].detach().numpy()
-                scores = softmax(scores)
-                sentimentIndexWithMaxScore = np.argmax(scores)
-                sentimentLabel = self.labels[sentimentIndexWithMaxScore]
-                sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment']
-                return sentiment
-            case _:
-                compound = self.sia.polarity_scores(content)['compound']
-                if compound > (1 / 3):
-                    return ['positive', 'vaderSentiment']
-                elif compound < (-1 / 3):
-                    return ['negative', 'vaderSentiment']
-                else:
-                    return ['neutral', 'vaderSentiment']
-
-
-
-    def initModel(self):
-        # PT
-        tokenizer = AutoTokenizer.from_pretrained(self.enModelType)
-        tokenizer.save_pretrained(self.enModelType)
-        model = AutoModelForSequenceClassification.from_pretrained(self.enModelType)
-        model.save_pretrained(self.enModelType)
-        return model, tokenizer
-
-    # # TF
-    # model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
-    # model.save_pretrained(MODEL)
-
-    # text = "Good night 😊"
-    # encoded_input = tokenizer(text, return_tensors='tf')
-    # output = model(encoded_input)
-    # scores = output[0][0].numpy()
-    # scores = softmax(scores)
+from germansentiment import SentimentModel
+import numpy as np
+from scipy.special import softmax
+from transformers import AutoModelForSequenceClassification
+from transformers import AutoTokenizer
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+
+
+# Preprocess text (username and link placeholders)
+def preprocess(text):
+    new_text = []
+
+    for t in text.split(" "):
+        t = '@user' if t.startswith('@') and len(t) > 1 else t
+        t = 'http' if t.startswith('http') else t
+        new_text.append(t)
+    return " ".join(new_text)
+
+
+class SentiTooter:
+    """"""
+
+    def __init__(self):
+        self.deModel = SentimentModel()
+        self.enModelType = f"cardiffnlp/twitter-roberta-base-sentiment"
+        self.enModel, self.enTokenizer = self.initModel()
+        # https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/sentiment/mapping.txt
+        self.labels = ['negative', 'neutral', 'positive']
+        self.sia = SentimentIntensityAnalyzer()
+
+    def analyze(self, language, content):
+        match language:
+            case 'de':
+                sentiment = self.deModel.predict_sentiment([content])
+                sentiment.append('germanSentiment')
+                return sentiment
+            case 'en':
+                text = preprocess(content)
+                encoded_input = self.enTokenizer(text, return_tensors='pt')
+                output = self.enModel(**encoded_input)
+                scores = output[0][0].detach().numpy()
+                scores = softmax(scores)
+                sentimentIndexWithMaxScore = np.argmax(scores)
+                sentimentLabel = self.labels[sentimentIndexWithMaxScore]
+                sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment']
+                return sentiment
+            case _:
+                compound = self.sia.polarity_scores(content)['compound']
+                if compound > (1 / 3):
+                    return ['positive', 'vaderSentiment']
+                elif compound < (-1 / 3):
+                    return ['negative', 'vaderSentiment']
+                else:
+                    return ['neutral', 'vaderSentiment']
+
+
+
+    def initModel(self):
+        # PT
+        tokenizer = AutoTokenizer.from_pretrained(self.enModelType)
+        tokenizer.save_pretrained(self.enModelType)
+        model = AutoModelForSequenceClassification.from_pretrained(self.enModelType)
+        model.save_pretrained(self.enModelType)
+        return model, tokenizer
+
+    # # TF
+    # model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
+    # model.save_pretrained(MODEL)
+
+    # text = "Good night 😊"
+    # encoded_input = tokenizer(text, return_tensors='tf')
+    # output = model(encoded_input)
+    # scores = output[0][0].numpy()
+    # scores = softmax(scores)
diff --git a/Tables.py b/Tables.py
index 78aa412..c64178a 100644
--- a/Tables.py
+++ b/Tables.py
@@ -1,32 +1,32 @@
-from DbSetup import Base
-from sqlalchemy import Column, Date, Integer, Float, String
-
-class Toots(Base):
-    __tablename__ = 'Toots'
-    __table_args__ = {'extend_existing': True}
-    index = Column(Integer, primary_key=True)
-    model = Column(String(30))
-    datetime = Column(Date)
-    language = Column(String(3))
-    sentiment = Column(String(8))
-    tootId = Column(String(255))
-    toot = Column(String(600))
-    userName = Column(String(255))
-    userId = Column(String(255))
-
-
-
-class SentimentCounts(Base):
-    __tablename__ = 'SentimentCounts'
-    __table_args__ = {'extend_existing': True}
-    index = Column(Integer, primary_key=True)
-    sentimentCount = Column(Integer)
-    date = Column(Date, primary_key=True)
-    sentiment = Column(String(8))
-
-class SentimentMeans(Base):
-    __tablename__ = 'SentimentMeans'
-    __table_args__ = {'extend_existing': True}
-    index = Column(Integer, primary_key=True)
-    date = Column(Date, primary_key=True)
+from DbSetup import Base
+from sqlalchemy import Column, Date, Integer, Float, String
+
+class Toots(Base):
+    __tablename__ = 'Toots'
+    __table_args__ = {'extend_existing': True}
+    index = Column(Integer, primary_key=True)
+    model = Column(String(30))
+    datetime = Column(Date)
+    language = Column(String(3))
+    sentiment = Column(String(8))
+    tootId = Column(String(255))
+    toot = Column(String(600))
+    userName = Column(String(255))
+    userId = Column(String(255))
+
+
+
+class SentimentCounts(Base):
+    __tablename__ = 'SentimentCounts'
+    __table_args__ = {'extend_existing': True}
+    index = Column(Integer, primary_key=True)
+    sentimentCount = Column(Integer)
+    date = Column(Date, primary_key=True)
+    sentiment = Column(String(8))
+
+class SentimentMeans(Base):
+    __tablename__ = 'SentimentMeans'
+    __table_args__ = {'extend_existing': True}
+    index = Column(Integer, primary_key=True)
+    date = Column(Date, primary_key=True)
     SentimentsMean = Column(Float)
\ No newline at end of file
diff --git a/TootCrawler.py b/TootCrawler.py
index 1b081c2..fa131bf 100644
--- a/TootCrawler.py
+++ b/TootCrawler.py
@@ -1,48 +1,48 @@
-from langdetect import detect
-import pytz
-import pandas as pd
-import re
-from SentiTooter import SentiTooter
-from pprint import pprint
-
-class TootCrawler():
-
-    def __init__(self, mastodonInstance) -> None:
-        self.mastodonInstance = mastodonInstance
-        self.compilePattern = re.compile('<.*?>')
-        self.sentiTooter = SentiTooter()
-        self.localTimezone = pytz.timezone('Europe/Berlin')
-
-    def getLocalTimeline(self, minId=None):
-        return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
-
-    def cleanhtml(self, raw_html):
-        cleantext = re.sub(self.compilePattern, '', raw_html)
-        cleantext = re.sub(r'http\S+', '', cleantext)
-        return cleantext
-
-    def buildTootsDataframe(self, minId=None):
-        toots = []
-        allTimelineResults = []
-        timelinePagination = self.getLocalTimeline(minId)
-
-        while timelinePagination:
-            allTimelineResults = allTimelineResults + timelinePagination
-            timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
-        for i in allTimelineResults:
-            content = self.cleanhtml(i.content)
-            language = detect(content)
-            sentiment = self.sentiTooter.analyze(language, content)
-            toot = {
-                "sentiment": sentiment[0],
-                "model": sentiment[1],
-                "toot": content,
-                "datetime": i.created_at.astimezone(self.localTimezone),
-                "language": language,
-                "userName": i.account.display_name,
-                "userId": i.account.id,
-                "tootId": i.id
-            }
-            toots.append(toot)
-        toots.sort(key=lambda item:item.get('datetime'))
+from langdetect import detect
+import pytz
+import pandas as pd
+import re
+from SentiTooter import SentiTooter
+from pprint import pprint
+
+class TootCrawler():
+
+    def __init__(self, mastodonInstance) -> None:
+        self.mastodonInstance = mastodonInstance
+        self.compilePattern = re.compile('<.*?>')
+        self.sentiTooter = SentiTooter()
+        self.localTimezone = pytz.timezone('Europe/Berlin')
+
+    def getLocalTimeline(self, minId=None):
+        return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
+
+    def cleanhtml(self, raw_html):
+        cleantext = re.sub(self.compilePattern, '', raw_html)
+        cleantext = re.sub(r'http\S+', '', cleantext)
+        return cleantext
+
+    def buildTootsDataframe(self, minId=None):
+        toots = []
+        allTimelineResults = []
+        timelinePagination = self.getLocalTimeline(minId)
+
+        while timelinePagination:
+            allTimelineResults = allTimelineResults + timelinePagination
+            timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
+        for i in allTimelineResults:
+            content = self.cleanhtml(i.content)
+            language = detect(content)
+            sentiment = self.sentiTooter.analyze(language, content)
+            toot = {
+                "sentiment": sentiment[0],
+                "model": sentiment[1],
+                "toot": content,
+                "datetime": i.created_at.astimezone(self.localTimezone),
+                "language": language,
+                "userName": i.account.display_name,
+                "userId": i.account.id,
+                "tootId": i.id
+            }
+            toots.append(toot)
+        toots.sort(key=lambda item:item.get('datetime'))
         return pd.DataFrame.from_records(toots)
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 842b542..47f6d81 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,12 @@
-mastodon.py
-matplotlib
-pandas
-sqlalchemy
-vader-multi
-numpy
-pytz
-transformers
\ No newline at end of file
+mastodon.py
+matplotlib
+pandas
+sqlalchemy
+vader-multi
+langdetect
+numpy
+pytz
+transformers
+wheel
+germansentiment
+scipy

From 79f54079f7dabb9e526f41ca28401975d0155215 Mon Sep 17 00:00:00 2001
From: Robert Nasarek <rbrt.nsrk@posteo.de>
Date: Tue, 31 Jan 2023 17:51:06 +0100
Subject: [PATCH 02/10] fixed unrecognisable lang bug

---
 Main.py        | 2 +-
 TootCrawler.py | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/Main.py b/Main.py
index 2b6768a..e6b7a3a 100644
--- a/Main.py
+++ b/Main.py
@@ -80,7 +80,7 @@ axes[0].legend(pieChartlabels, loc='upper right', bbox_to_anchor=(0.9, 0.9))
 # Line chart.
 lineChart = dataframe4LineChart.plot.line(
     ax=axes[1],
-    title='Mean of all sentiments from max positive (1) to min negative (-1)'
+    title='"Mean" of all sentiments. Please note that the sentiments are classified in a nominal scale: positive (1), neutral (0), and negative (-1) and NOT with compounds. Therefore this value indicates a tendency and not a correct statistical value.'
 )
 axes[1].grid(True)
 axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)])
diff --git a/TootCrawler.py b/TootCrawler.py
index fa131bf..a657a5d 100644
--- a/TootCrawler.py
+++ b/TootCrawler.py
@@ -31,7 +31,10 @@ class TootCrawler():
             timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
         for i in allTimelineResults:
             content = self.cleanhtml(i.content)
-            language = detect(content)
+            try:
+                language = detect(content)
+            except:
+                language = None
             sentiment = self.sentiTooter.analyze(language, content)
             toot = {
                 "sentiment": sentiment[0],

From 8f7c57808779945a850fb7af9e7e1834dd7d7dfb Mon Sep 17 00:00:00 2001
From: Robert Nasarek <rbrt.nsrk@posteo.de>
Date: Wed, 15 Mar 2023 11:16:35 +0100
Subject: [PATCH 03/10] shortend description

---
 Main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Main.py b/Main.py
index e6b7a3a..5dd227d 100644
--- a/Main.py
+++ b/Main.py
@@ -80,7 +80,7 @@ axes[0].legend(pieChartlabels, loc='upper right', bbox_to_anchor=(0.9, 0.9))
 # Line chart.
 lineChart = dataframe4LineChart.plot.line(
     ax=axes[1],
-    title='"Mean" of all sentiments. Please note that the sentiments are classified in a nominal scale: positive (1), neutral (0), and negative (-1) and NOT with compounds. Therefore this value indicates a tendency and not a correct statistical value.'
+    title='"Mean" of sentiments, calculated from nominal values, pos(1), neu (0), neg (-1)!'
 )
 axes[1].grid(True)
 axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)])
@@ -93,6 +93,6 @@ axes[1].tick_params(which='minor', length=0)
 plotFileUrl = f'./plots/{TodayDate}.png'
 plt.savefig(plotFileUrl)
 
-media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
+media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}. Please note that the sentiments are classified in a nominal scale: positive (1), neutral (0), and negative (-1) and NOT with compounds. Therefore the mean indicates a tendency and not a correct statistical value.")
 mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en')
 

From 3b677e5713621639b1131140c48f732a3668b4f0 Mon Sep 17 00:00:00 2001
From: rnsrk <rbrt.nsrk@posteo.de>
Date: Wed, 15 Mar 2023 13:21:44 +0100
Subject: [PATCH 04/10] underway to wordcount

---
 CRUDManager.py   | 12 +++++++++++-
 Main.py          |  8 ++++++--
 SentiTooter.py   | 17 +++--------------
 requirements.txt |  5 ++++-
 4 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/CRUDManager.py b/CRUDManager.py
index 0226fbf..e18a575 100644
--- a/CRUDManager.py
+++ b/CRUDManager.py
@@ -3,7 +3,6 @@ import pandas as pd
 from sqlalchemy import desc, select
 from Tables import Toots
 
-
 def calculateSentimentCount():
     query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount
                 FROM Toots
@@ -32,6 +31,17 @@ def calculateSentimentMean(dataframe):
         ]
     )
 
+def calculateWordCount():
+    query = f'''SELECT DATE(datetime) as date, language, sentiment, toot
+                FROM Toots
+                WHERE datetime >= DATE("now","-1 day")
+                AND datetime < DATE("now")'''
+    return pd.read_sql(
+        query,
+        engine,
+        parse_dates=["datetime"]
+    )
+
 class CRUDManager():
 
     def saveToDatabase(self, dataframe, table:str, useIndex=False):
diff --git a/Main.py b/Main.py
index a64a63c..e43e2ca 100644
--- a/Main.py
+++ b/Main.py
@@ -1,4 +1,4 @@
-from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean
+from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean, calculateWordCount
 from datetime import datetime, date
 from DbSetup import init_db
 import locale
@@ -25,12 +25,16 @@ crudManager = CRUDManager()
 
 lastTootId = crudManager.getLastToot()
 tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
-exit()
+
 if not tootsDataframe.empty:
     crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
 else:
     print('Nothing changed since last database insert!')
 
+wordCounts = calculateWordCount()
+print(wordCounts);
+print("exit programm")
+exit()
 sentimentsYesterday = calculateSentimentCount()
 sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
 
diff --git a/SentiTooter.py b/SentiTooter.py
index 9626078..6aa1f92 100644
--- a/SentiTooter.py
+++ b/SentiTooter.py
@@ -41,15 +41,15 @@ class SentiTooter:
                 output = self.enModel(**encoded_input)
                 scores = output[0][0].detach().numpy()
                 scores = softmax(scores)
-                print(scores)
+                #print(scores)
                 sentimentIndexWithMaxScore = np.argmax(scores)
                 sentimentLabel = self.labels[sentimentIndexWithMaxScore]
                 sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment', max(scores)]
-                print(sentiment)
+                #print(sentiment)
                 return sentiment
             case _:
                 compound = self.sia.polarity_scores(content)['compound']
-                print(self.sia.polarity_scores(content), 'vaderSentiment')
+                #print(self.sia.polarity_scores(content), 'vaderSentiment')
                 if compound > (1 / 3):
                     return ['positive', 'vaderSentiment']
                 elif compound < (-1 / 3):
@@ -58,7 +58,6 @@ class SentiTooter:
                     return ['neutral', 'vaderSentiment']
 
 
-
     def initModel(self):
         # PT
         tokenizer = AutoTokenizer.from_pretrained(self.enModelType)
@@ -66,13 +65,3 @@ class SentiTooter:
         model = AutoModelForSequenceClassification.from_pretrained(self.enModelType)
         model.save_pretrained(self.enModelType)
         return model, tokenizer
-
-    # # TF
-    # model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
-    # model.save_pretrained(MODEL)
-
-    # text = "Good night 😊"
-    # encoded_input = tokenizer(text, return_tensors='tf')
-    # output = model(encoded_input)
-    # scores = output[0][0].numpy()
-    # scores = softmax(scores)
diff --git a/requirements.txt b/requirements.txt
index 842b542..d280535 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,7 @@ sqlalchemy
 vader-multi
 numpy
 pytz
-transformers
\ No newline at end of file
+transformers
+langdetect
+germansentiment
+scipy
\ No newline at end of file

From 6a8caac29efed1bde02575f6804ec650d3fe02f4 Mon Sep 17 00:00:00 2001
From: rnsrk <rbrt.nsrk@posteo.de>
Date: Wed, 15 Mar 2023 14:27:07 +0100
Subject: [PATCH 05/10] implement rough wordcount

---
 CRUDManager.py   |  8 ++++----
 Main.py          |  8 ++++++--
 SentiTooter.py   | 38 +++++++++++++++++++++++++++++++++++---
 requirements.txt |  2 ++
 4 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/CRUDManager.py b/CRUDManager.py
index ea7e7e5..dccdf00 100644
--- a/CRUDManager.py
+++ b/CRUDManager.py
@@ -31,14 +31,14 @@ def calculateSentimentMean(dataframe):
         ]
     )
 
-def calculateWordCount():
-    query = f'''SELECT DATE(datetime) as date, language, sentiment, toot
+def getYesterdaysToots():
+    query = f'''SELECT datetime as date, language, sentiment, toot
                 FROM Toots
                 WHERE datetime >= DATE("now","-1 day")
                 AND datetime < DATE("now")'''
     return pd.read_sql(
-        query,
-        engine,
+        sql.text(query),
+        connection,
         parse_dates=["datetime"]
     )
 
diff --git a/Main.py b/Main.py
index 56ba6b7..2af6e60 100644
--- a/Main.py
+++ b/Main.py
@@ -1,4 +1,4 @@
-from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean, calculateWordCount
+from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean, getYesterdaysToots
 from datetime import datetime, date
 from DbSetup import init_db
 import locale
@@ -6,6 +6,7 @@ from MastodonAccountManager import MastodonAccountManager
 import matplotlib.pyplot as plt
 import matplotlib.dates as mdates
 from TootCrawler import TootCrawler
+from SentiTooter import translateToots, countWords
 
 locale.setlocale(locale.LC_TIME, "en_US.UTF-8")
 init_db()
@@ -31,7 +32,10 @@ if not tootsDataframe.empty:
 else:
     print('Nothing changed since last database insert!')
 
-wordCounts = calculateWordCount()
+yesterdaysToots = getYesterdaysToots()
+translatedToots = translateToots(yesterdaysToots)
+tootsSeries = translatedToots.toot
+wordCounts = countWords(tootsSeries.str.cat(sep=' '), 10)
 print(wordCounts);
 print("exit programm")
 exit()
diff --git a/SentiTooter.py b/SentiTooter.py
index 6aa1f92..d5f22ef 100644
--- a/SentiTooter.py
+++ b/SentiTooter.py
@@ -4,15 +4,17 @@ from scipy.special import softmax
 from transformers import AutoModelForSequenceClassification
 from transformers import AutoTokenizer
 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
-
+from deep_translator import GoogleTranslator
+import spacy
+from collections import Counter
 
 # Preprocess text (username and link placeholders)
 def preprocess(text):
     new_text = []
 
     for t in text.split(" "):
-        t = '@user' if t.startswith('@') and len(t) > 1 else t
-        t = 'http' if t.startswith('http') else t
+        t = '' if t.startswith('@') and len(t) > 1 else t
+        t = '' if t.startswith('http') else t
         new_text.append(t)
     return " ".join(new_text)
 
@@ -65,3 +67,33 @@ class SentiTooter:
         model = AutoModelForSequenceClassification.from_pretrained(self.enModelType)
         model.save_pretrained(self.enModelType)
         return model, tokenizer
+
+def translateToots(yesterdaysToots):
+    yesterdaysTootsTranslated = yesterdaysToots
+    for index, row in yesterdaysTootsTranslated.iterrows():
+        if (row['language'] != 'de'):
+            try:
+                yesterdaysTootsTranslated.at[index,'toot'] = translateToot(row['language'], row['toot'])
+                yesterdaysTootsTranslated.at[index,'language'] = 'de'
+            except:
+                yesterdaysTootsTranslated.drop(index)
+    return yesterdaysTootsTranslated
+
+def translateToot(language, toot):
+    content = preprocess(toot)
+    return GoogleTranslator(source=language, target='de').translate(content)
+
+def countWords(concatedToots, count):
+    nlp = spacy.load('de_core_news_sm')
+    doc = nlp(concatedToots)
+
+    # noun tokens that arent stop words or punctuations
+    nouns = [token.text
+            for token in doc
+            if (not token.is_stop and
+                not token.is_punct and
+                token.pos_ == "NOUN")]
+
+    # five most common noun tokens
+    noun_freq = Counter(nouns)
+    return noun_freq.most_common(count)
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 2cf3aab..bc6906e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,3 +10,5 @@ transformers
 wheel
 germansentiment
 scipy
+deep_translator
+spacy
\ No newline at end of file

From 4479bd24293143da9a9ea9439c9443afbe1f9997 Mon Sep 17 00:00:00 2001
From: rnsrk <rbrt.nsrk@posteo.de>
Date: Wed, 15 Mar 2023 16:02:47 +0100
Subject: [PATCH 06/10] implement word counts.

---
 Main.py        | 41 ++++++++++++++++++++++++++++-------------
 SentiTooter.py | 24 +++++++++++++++++++-----
 2 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/Main.py b/Main.py
index 2af6e60..2e7908f 100644
--- a/Main.py
+++ b/Main.py
@@ -6,11 +6,12 @@ from MastodonAccountManager import MastodonAccountManager
 import matplotlib.pyplot as plt
 import matplotlib.dates as mdates
 from TootCrawler import TootCrawler
-from SentiTooter import translateToots, countWords
+from SentiTooter import translateToots, createWordCountPerSentiment
 
 locale.setlocale(locale.LC_TIME, "en_US.UTF-8")
 init_db()
 
+print('Initialize Mastodon...')
 mastodonAccountManager = MastodonAccountManager()
 mastodonInstance = mastodonAccountManager.instance
 """
@@ -20,34 +21,47 @@ mastodonInstance.log_in(
     to_file = 'hedonodon_usercred.secret'
 )
 """
+print('done!')
 
+print('Fetching recent toots...')
 tootCrawler = TootCrawler(mastodonInstance)
 crudManager = CRUDManager()
-
 lastTootId = crudManager.getLastToot()
 tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
+print('done!')
 
+print('Save toots to database...')
 if not tootsDataframe.empty:
     crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
+    print('done!')
 else:
-    print('Nothing changed since last database insert!')
+    print('nothing changed since last database insert!')
 
+print('Calculate word counts...')
 yesterdaysToots = getYesterdaysToots()
 translatedToots = translateToots(yesterdaysToots)
-tootsSeries = translatedToots.toot
-wordCounts = countWords(tootsSeries.str.cat(sep=' '), 10)
-print(wordCounts);
-print("exit programm")
-exit()
-sentimentsYesterday = calculateSentimentCount()
-sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
+wordCountsPerSentiment = createWordCountPerSentiment(translatedToots)
+print('done!')
 
+print(wordCountsPerSentiment);
+
+print('Calculate sentiment counts...')
+sentimentsYesterday = calculateSentimentCount()
+print('done!')
+
+print('Calculate sentiment mean...')
+sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
+print('done!')
+
+print('Save calculations to database...')
 if not tootsDataframe.empty:
     crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='SentimentCounts', useIndex=True)
     crudManager.saveToDatabase(dataframe=sentimentMeansYesterday, table='SentimentMeans', useIndex=True)
+    print('done!')
 else:
-    print('Nothing changed since last database insert!')
+    print('nothing changed since last database insert!')
 
+print('Create figure...')
 colormap = {
     'negative': '#ff9999',
     'neutral': '#ffcc99',
@@ -100,7 +114,8 @@ axes[1].xaxis.set_minor_formatter(mdates.DateFormatter('%h'))
 axes[1].tick_params(which='minor', length=0)
 plotFileUrl = f'./plots/{TodayDate}.png'
 plt.savefig(plotFileUrl)
+print('done!')
 
-#media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
-#mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en')
+media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
+mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.\nWord counts per sentiment:\n{wordCountsPerSentiment}', media_ids=media, language='en')
 
diff --git a/SentiTooter.py b/SentiTooter.py
index d5f22ef..28a5623 100644
--- a/SentiTooter.py
+++ b/SentiTooter.py
@@ -71,20 +71,20 @@ class SentiTooter:
 def translateToots(yesterdaysToots):
     yesterdaysTootsTranslated = yesterdaysToots
     for index, row in yesterdaysTootsTranslated.iterrows():
-        if (row['language'] != 'de'):
+        if (row['language'] != 'en'):
             try:
                 yesterdaysTootsTranslated.at[index,'toot'] = translateToot(row['language'], row['toot'])
-                yesterdaysTootsTranslated.at[index,'language'] = 'de'
+                yesterdaysTootsTranslated.at[index,'language'] = 'en'
             except:
                 yesterdaysTootsTranslated.drop(index)
     return yesterdaysTootsTranslated
 
 def translateToot(language, toot):
     content = preprocess(toot)
-    return GoogleTranslator(source=language, target='de').translate(content)
+    return GoogleTranslator(source=language, target='en').translate(content)
 
 def countWords(concatedToots, count):
-    nlp = spacy.load('de_core_news_sm')
+    nlp = spacy.load('en_core_web_md')
     doc = nlp(concatedToots)
 
     # noun tokens that arent stop words or punctuations
@@ -96,4 +96,18 @@ def countWords(concatedToots, count):
 
     # five most common noun tokens
     noun_freq = Counter(nouns)
-    return noun_freq.most_common(count)
\ No newline at end of file
+    return noun_freq.most_common(count)
+
+def createWordCountPerSentiment(translatedToots):
+    sentimentList = []
+    for sentiment in ['positive', 'neutral', 'negative']:
+        tootsSeries = translatedToots[translatedToots['sentiment'] == sentiment].toot
+        wordCounts = countWords(tootsSeries.str.cat(sep=' '), 5)
+        countList = []
+        for count in wordCounts:
+             countList.append(str(count[0]) + ' (' + str(count[1]) + ')')
+        list2String = ', '.join(countList)
+        sentimentString = sentiment + ': ' + list2String
+        sentimentList.append(sentimentString)
+    wordCountsPerSentiments = '\n'.join(sentimentList)
+    return wordCountsPerSentiments
\ No newline at end of file

From bc842244c728345977e82f44534b2203809f809a Mon Sep 17 00:00:00 2001
From: rnsrk <rbrt.nsrk@posteo.de>
Date: Fri, 17 Mar 2023 20:06:01 +0100
Subject: [PATCH 07/10] add code documentation

---
 CRUDManager.py            |  73 +++++++++++++++++++++++--
 DbSetup.py                |   6 ++
 Main.py                   |  27 +++++++--
 MastodonAccountManager.py |   2 +
 SentiTooter.py            | 112 +++++++++++++++++++++++++++++++++-----
 Tables.py                 |   4 +-
 TootCrawler.py            |  68 +++++++++++++++++++++--
 7 files changed, 261 insertions(+), 31 deletions(-)

diff --git a/CRUDManager.py b/CRUDManager.py
index dccdf00..cd0c7e0 100644
--- a/CRUDManager.py
+++ b/CRUDManager.py
@@ -3,7 +3,19 @@ import pandas as pd
 from sqlalchemy import desc, select, sql
 from Tables import Toots
 
+from pandas.core.api import (
+    DataFrame)
+
 def calculateSentimentCount():
+    """Calculates the frequencies of the sentiments.
+
+    Returns
+    -------
+    DataFrame
+        Containing date (YY-MM-DD), sentiment (positive, neutral, negative),
+        and sentimentCount.
+    """
+
     query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount
                 FROM Toots
                 GROUP BY DATE(datetime),
@@ -16,7 +28,18 @@ def calculateSentimentCount():
         parse_dates=["datetime"]
     )
 
-def calculateSentimentMean(dataframe):
+def calculateSentimentMean(dataframe:DataFrame) -> DataFrame:
+    """Calculates the mean of the sentiments.
+
+    Parameters
+    -------
+        dataframe: DataFrame
+
+    Returns
+    -------
+        Dataframe
+        Containing date (YY-MM-DD), sentimentsMean.
+    """
     negativeSentimentSum = dataframe[dataframe['sentiment'] == 'negative']['sentimentCount'].sum() * -1
     positiveSentimentSum = dataframe[dataframe['sentiment'] == 'positive']['sentimentCount'].sum()
     sentimentSum = dataframe['sentimentCount'].sum()
@@ -31,7 +54,14 @@ def calculateSentimentMean(dataframe):
         ]
     )
 
-def getYesterdaysToots():
+def getYesterdaysToots() -> DataFrame:
+    """Fetches yesterdays toots from database.
+
+    Returns
+    -------
+        pd.Dataframe
+        Containing date (YY-MM-DD), language, sentiment, toot.
+    """
     query = f'''SELECT datetime as date, language, sentiment, toot
                 FROM Toots
                 WHERE datetime >= DATE("now","-1 day")
@@ -43,16 +73,49 @@ def getYesterdaysToots():
     )
 
 class CRUDManager():
+    """Class for database operations"""
 
-    def saveToDatabase(self, dataframe, table:str, useIndex=False):
+    def saveToDatabase(self, dataframe:DataFrame, table:str, useIndex=False):
+        """Saves dataframe to database.
+
+        Parameters
+        -------
+            dataframe: DataFrame
+                Input dataframe.
+            table: str
+                Table, where to save the data.
+            useIndex: boolean
+                Should the index of the dataframe be used as index for
+                the database table?
+        """
         try:
             dataframe.to_sql(table, engine, index=useIndex, if_exists="append")
         except:
             print(f'Could not save data to {table}!')
 
-    def loadFromDatabase(self, table:str, indexColumn=None):
+    def loadFromDatabase(self, table:str, indexColumn=None) -> DataFrame:
+        """Load a table into a dataframe.
+
+        Parameters
+        -------
+            table: str
+                Table, where to save the data.
+            indexColumn: str | None
+                Should the index of the table be used as index for
+                the dataframe?
+        Returns
+        -------
+            DataFrame
+        """
         return pd.read_sql_table(table, connection, index_col=indexColumn)
 
-    def getLastToot(self):
+    def getLastToot(self) -> str:
+        """Query the last toot id from database.
+
+        Results
+        -------
+            str
+            A toot id.
+        """
         stmt = select(Toots.tootId).order_by(desc('datetime'))
         return session.scalars(stmt).first()
diff --git a/DbSetup.py b/DbSetup.py
index c787928..b6179f7 100644
--- a/DbSetup.py
+++ b/DbSetup.py
@@ -1,3 +1,7 @@
+"""Script to initialize the database.
+     Serves database url, engine, connection and session.
+"""
+
 from sqlalchemy import create_engine
 from sqlalchemy.orm import Session
 from sqlalchemy.ext.declarative import declarative_base
@@ -9,4 +13,6 @@ session = Session(engine)
 Base = declarative_base()
 
 def init_db():
+     """Initialize the database.
+     """
      Base.metadata.create_all(bind=engine)
diff --git a/Main.py b/Main.py
index 2e7908f..5bb6532 100644
--- a/Main.py
+++ b/Main.py
@@ -1,3 +1,19 @@
+"""
+Hedonodon toot sentiment analyzer.
+
+This programm fetches toots from the fedihum.org Mastodon instance, calculates
+the frequencies of the sentiments (positive, neutral, negative) and the mean
+from these nominal values (even this is not statistical correct (;-_-)!, but
+not all analyzer return compounds).
+It also calculates the word count of the nouns per sentiment.
+
+It uses germansentiment for german toots, twitter-roberta-base-sentiment for
+english toots, and vaderSentiment for other languages.
+
+For the word counts I translate the toots to english with the GoogleTranslator
+first.
+"""
+
 from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean, getYesterdaysToots
 from datetime import datetime, date
 from DbSetup import init_db
@@ -6,7 +22,7 @@ from MastodonAccountManager import MastodonAccountManager
 import matplotlib.pyplot as plt
 import matplotlib.dates as mdates
 from TootCrawler import TootCrawler
-from SentiTooter import translateToots, createWordCountPerSentiment
+from SentiTooter import translateToots, createWordFrequenciesPerSentiment
 
 locale.setlocale(locale.LC_TIME, "en_US.UTF-8")
 init_db()
@@ -40,7 +56,7 @@ else:
 print('Calculate word counts...')
 yesterdaysToots = getYesterdaysToots()
 translatedToots = translateToots(yesterdaysToots)
-wordCountsPerSentiment = createWordCountPerSentiment(translatedToots)
+wordCountsPerSentiment = createWordFrequenciesPerSentiment(translatedToots)
 print('done!')
 
 print(wordCountsPerSentiment);
@@ -116,6 +132,7 @@ plotFileUrl = f'./plots/{TodayDate}.png'
 plt.savefig(plotFileUrl)
 print('done!')
 
-media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
-mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.\nWord counts per sentiment:\n{wordCountsPerSentiment}', media_ids=media, language='en')
-
+print('Send toot...')
+#media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the sentiment mean up to {TodayDate}.")
+#mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.\nWord counts per sentiment:\n{wordCountsPerSentiment}', media_ids=media, language='en')
+print('done!')
diff --git a/MastodonAccountManager.py b/MastodonAccountManager.py
index 68d62bb..957ac4e 100644
--- a/MastodonAccountManager.py
+++ b/MastodonAccountManager.py
@@ -1,5 +1,7 @@
 from mastodon import Mastodon
 
 class MastodonAccountManager():
+    """Initialize the Mastodon account.
+    """
     def __init__(self):
         self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret')
diff --git a/SentiTooter.py b/SentiTooter.py
index 28a5623..b5c1d6a 100644
--- a/SentiTooter.py
+++ b/SentiTooter.py
@@ -1,4 +1,5 @@
 from germansentiment import SentimentModel
+from pandas import DataFrame
 import numpy as np
 from scipy.special import softmax
 from transformers import AutoModelForSequenceClassification
@@ -9,7 +10,18 @@ import spacy
 from collections import Counter
 
 # Preprocess text (username and link placeholders)
-def preprocess(text):
+def preprocess(text:str) -> str:
+    """Removes tags and urls from text.
+
+    Parameters
+    ------
+        text: str
+        The raw toot from Mastodon.
+    Returns
+    ------
+        str
+        The cleaned text.
+    """
     new_text = []
 
     for t in text.split(" "):
@@ -20,9 +32,12 @@ def preprocess(text):
 
 
 class SentiTooter:
-    """"""
+    """Class to analyze the toots.
+    """
 
     def __init__(self):
+        """Initilize the sentiment models and labels.
+        """
         self.deModel = SentimentModel()
         self.enModelType = f"cardiffnlp/twitter-roberta-base-sentiment"
         self.enModel, self.enTokenizer = self.initModel()
@@ -30,7 +45,20 @@ class SentiTooter:
         self.labels = ['negative', 'neutral', 'positive']
         self.sia = SentimentIntensityAnalyzer()
 
-    def analyze(self, language, content):
+    def analyze(self, language:str, content:str) -> list[str, str, float]:
+        """Analyzes the sentiments of the toots.
+
+        Parameters
+        ------
+            language: str
+            The language tag of the toot.
+            content: str
+            The toot content.
+        Returns
+        ------
+            list[str, str, float]
+            A list with the sentiment, analyzer type, and sentiment score.
+        """
         match language:
             case 'de':
                 sentimentList, probabilitiesList = self.deModel.predict_sentiment([content], output_probabilities=True)
@@ -61,6 +89,13 @@ class SentiTooter:
 
 
     def initModel(self):
+        """Initialize the english models.
+
+        Returns
+        ------
+            tupel
+                The pretrained model and tokenizer.
+        """
         # PT
         tokenizer = AutoTokenizer.from_pretrained(self.enModelType)
         tokenizer.save_pretrained(self.enModelType)
@@ -68,7 +103,14 @@ class SentiTooter:
         model.save_pretrained(self.enModelType)
         return model, tokenizer
 
-def translateToots(yesterdaysToots):
+def translateToots(yesterdaysToots:DataFrame) -> DataFrame:
+    """Translates all toots to english.
+
+    Returns
+    ------
+        Dataframe
+        Containing the english translated toots.
+    """
     yesterdaysTootsTranslated = yesterdaysToots
     for index, row in yesterdaysTootsTranslated.iterrows():
         if (row['language'] != 'en'):
@@ -79,11 +121,39 @@ def translateToots(yesterdaysToots):
                 yesterdaysTootsTranslated.drop(index)
     return yesterdaysTootsTranslated
 
-def translateToot(language, toot):
+def translateToot(language:str, toot:str) -> str:
+    """Translate a toot in english.
+
+    Parameters
+    ------
+        language:str
+        The language of the toot.
+        toot: str
+        The toot content.
+
+    Returns
+    ------
+        str
+        The in english translated toot.
+    """
     content = preprocess(toot)
     return GoogleTranslator(source=language, target='en').translate(content)
 
-def countWords(concatedToots, count):
+def countWords(concatedToots: str, number: int) -> list:
+    """Counts the word frequencies in all toots of a given sentiment.
+
+    Parameters
+    ------
+        concatedToots: str
+        All toots from a sentiment.
+        number: int
+        Number of words to calculate word frequencies.
+
+    Returns
+    ------
+        list
+        List containing tuple of word and word frequency.
+    """
     nlp = spacy.load('en_core_web_md')
     doc = nlp(concatedToots)
 
@@ -96,18 +166,30 @@ def countWords(concatedToots, count):
 
     # five most common noun tokens
     noun_freq = Counter(nouns)
-    return noun_freq.most_common(count)
+    return noun_freq.most_common(number)
 
-def createWordCountPerSentiment(translatedToots):
+def createWordFrequenciesPerSentiment(translatedToots:DataFrame) -> str:
+    """Count all word frequencies of all toots per sentiment.
+
+    Paramters
+    ------
+        translatedToots: DataFrame
+        The dataframe with all toots in english.
+
+    Returns
+    ------
+        str
+        Containing words and wourd counts per sentiment.
+    """
     sentimentList = []
     for sentiment in ['positive', 'neutral', 'negative']:
         tootsSeries = translatedToots[translatedToots['sentiment'] == sentiment].toot
-        wordCounts = countWords(tootsSeries.str.cat(sep=' '), 5)
-        countList = []
-        for count in wordCounts:
-             countList.append(str(count[0]) + ' (' + str(count[1]) + ')')
-        list2String = ', '.join(countList)
+        wordFrequencies = countWords(tootsSeries.str.cat(sep=' '), 5)
+        FrequenciesList = []
+        for Frequencies in wordFrequencies:
+             FrequenciesList.append(str(Frequencies[0]) + ' (' + str(Frequencies[1]) + ')')
+        list2String = ', '.join(FrequenciesList)
         sentimentString = sentiment + ': ' + list2String
         sentimentList.append(sentimentString)
-    wordCountsPerSentiments = '\n'.join(sentimentList)
-    return wordCountsPerSentiments
\ No newline at end of file
+    wordFrequenciessPerSentiments = '\n'.join(sentimentList)
+    return wordFrequenciessPerSentiments
\ No newline at end of file
diff --git a/Tables.py b/Tables.py
index c64178a..071179a 100644
--- a/Tables.py
+++ b/Tables.py
@@ -1,3 +1,5 @@
+"""This script containing the table definitions for the database."""
+
 from DbSetup import Base
 from sqlalchemy import Column, Date, Integer, Float, String
 
@@ -14,8 +16,6 @@ class Toots(Base):
     userName = Column(String(255))
     userId = Column(String(255))
 
-
-
 class SentimentCounts(Base):
     __tablename__ = 'SentimentCounts'
     __table_args__ = {'extend_existing': True}
diff --git a/TootCrawler.py b/TootCrawler.py
index a657a5d..67cd722 100644
--- a/TootCrawler.py
+++ b/TootCrawler.py
@@ -1,27 +1,87 @@
 from langdetect import detect
 import pytz
 import pandas as pd
+from pandas import DataFrame
 import re
 from SentiTooter import SentiTooter
 from pprint import pprint
 
 class TootCrawler():
+    """Class to fetch the recent toots from fedihum.org."""
 
-    def __init__(self, mastodonInstance) -> None:
+    def __init__(self, mastodonInstance: any) -> None:
+        """Initialize the Mastodon instance and depending classes.
+
+        Parameters
+        ------
+            mastodonInstance: any
+                The initialized Mastodon instance.
+        """
         self.mastodonInstance = mastodonInstance
         self.compilePattern = re.compile('<.*?>')
         self.sentiTooter = SentiTooter()
         self.localTimezone = pytz.timezone('Europe/Berlin')
 
-    def getLocalTimeline(self, minId=None):
+    def getLocalTimeline(self, minId=None) -> any:
+        """Receave the local timeline
+
+        Parameters
+        ------
+            minId: str | None
+                The last fetched toot id from the database.
+
+        Returns
+        ------
+            any
+                The local Mastodon timeline from fedihum.org.
+        """
         return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
 
-    def cleanhtml(self, raw_html):
+    def cleanhtml(self, raw_html:str) -> str:
+        """remove brackets and http string from toots
+
+        Parameters
+        ------
+            raw_html: str
+            The toot content.
+        Returns
+        ------
+            str:
+            The cleaned toot content.
+        """
         cleantext = re.sub(self.compilePattern, '', raw_html)
         cleantext = re.sub(r'http\S+', '', cleantext)
         return cleantext
 
-    def buildTootsDataframe(self, minId=None):
+    def buildTootsDataframe(self, minId=None) -> DataFrame:
+        """Parse fetched toots from Mastodon to dataframe.
+
+        Parameters
+        ------
+            minId: str | None
+            The id of the last fetched toot.
+
+        Returns
+        ------
+            DataFrame
+            A Dataframe containing
+            sentiment: str
+                The sentiment (positive, neutral, negative)
+            model: str
+                The used sentiment model.
+            toot: str
+                The content of the toot.
+            datetime: datetime
+                The datetime of the toot.
+            language: str
+                The langage flag of the toot.
+            userName: str.
+                The user name of the toot.
+            userId: str
+                The user id.
+            tootId: str
+                The toot id.
+        """
         toots = []
         allTimelineResults = []
         timelinePagination = self.getLocalTimeline(minId)

From 8d9a7fa603e898eff857a35d06fa8690dec29ed4 Mon Sep 17 00:00:00 2001
From: rnsrk <rbrt.nsrk@posteo.de>
Date: Fri, 17 Mar 2023 21:25:44 +0100
Subject: [PATCH 08/10] take the large spacy model

---
 SentiTooter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/SentiTooter.py b/SentiTooter.py
index b5c1d6a..aabc498 100644
--- a/SentiTooter.py
+++ b/SentiTooter.py
@@ -154,7 +154,7 @@ def countWords(concatedToots: str, number: int) -> list:
         list
         List containing tuple of word and word frequency.
     """
-    nlp = spacy.load('en_core_web_md')
+    nlp = spacy.load('en_core_web_lg')
     doc = nlp(concatedToots)
 
     # noun tokens that arent stop words or punctuations

From cafda77e7fdea42ff2577b2a00c2c0d7ac818b11 Mon Sep 17 00:00:00 2001
From: rnsrk <rbrt.nsrk@posteo.de>
Date: Fri, 17 Mar 2023 21:26:14 +0100
Subject: [PATCH 09/10] Updated the README

---
 README.md | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 5440dc1..468a872 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,19 @@
 # Hedonodon
-I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds.
+## Prerequisites
+Install the dependencies with `python -m pip install -r requirements.txt`.
+Install SpaCys nlp model with `python -m spacy download en_core_web_lg`.
+If the automatic download of the twitter-roberta-base-sentiment model and tokenizer fail, go to the model pages on hugging face (see models section) and download the to the respective folder (cardiffnlp/twitter-roberta-base-sentiment)
 
-More Documentation coming soon!
\ No newline at end of file
+## Purpose
+Hedonodon fetched toots from fedihum.org and calculates the sentiments, sentiment mean and word frequencies of each day, and creates fancy diagrams from the data.
+
+## Motivation
+This tool was created to understand how sentiment analyses and nlp methods works, so it may lacks of proper use of models etc...
+
+## Models
+It uses "germansentiment"](https://huggingface.co/oliverguhr/german-sentiment-bert) for german toots, []"twitter-roberta-base-sentiment"](https://huggingface.co/cardiffnlp/twitter-roberta-base-sentiment) for
+english toots, and ["vaderSentiment"](https://pypi.org/project/vaderSentiment/)  for other languages.
+For the word counts I translate the toots to english with the GoogleTranslator from [deep_translater](https://pypi.org/project/deep-translator/) first and then use SpaCys nlp model ["en_core_web_lg"](https://spacy.io/models/en/) to calculate the word frequencies.
+
+## Weaknesses
+Since some moduls do not return sentiment compounds I have to use the nominal sentiment values (positive, neutral, negative) to calculate the mean of the day, which is statisticaly not okay (;-_-).
\ No newline at end of file

From 03792f21204df2d17a031a6fcd4ea95ba2423d51 Mon Sep 17 00:00:00 2001
From: rnsrk <rbrt.nsrk@posteo.de>
Date: Fri, 17 Mar 2023 21:29:16 +0100
Subject: [PATCH 10/10] Fixed some typos

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 468a872..58d02b6 100644
--- a/README.md
+++ b/README.md
@@ -2,10 +2,10 @@
 ## Prerequisites
 Install the dependencies with `python -m pip install -r requirements.txt`.
 Install SpaCys nlp model with `python -m spacy download en_core_web_lg`.
-If the automatic download of the twitter-roberta-base-sentiment model and tokenizer fail, go to the model pages on hugging face (see models section) and download the to the respective folder (cardiffnlp/twitter-roberta-base-sentiment)
+If the automatic download of the twitter-roberta-base-sentiment model and tokenizer fail, go to the model pages on hugging face (see models section) and download the to the respective folder (cardiffnlp/twitter-roberta-base-sentiment).
 
 ## Purpose
-Hedonodon fetched toots from fedihum.org and calculates the sentiments, sentiment mean and word frequencies of each day, and creates fancy diagrams from the data.
+Hedonodon fetch toots from fedihum.org and calculates the sentiments, sentiment mean and word frequencies of each day, and creates fancy diagrams from the data.
 
 ## Motivation
 This tool was created to understand how sentiment analyses and nlp methods works, so it may lacks of proper use of models etc...