commit 0b13673bcce081a279852b122eec2fc319f3bc00 Author: rnsrk Date: Tue Jan 3 00:40:59 2023 +0100 first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2d8a3a2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +database.db +plots +instance +__pycache__ +hedonodon_clientcred.secret +hedonodon_usercred.secret \ No newline at end of file diff --git a/CRUDManager.py b/CRUDManager.py new file mode 100644 index 0000000..4211035 --- /dev/null +++ b/CRUDManager.py @@ -0,0 +1,36 @@ +from DbSetup import engine, session, databaseUrl +import pandas as pd +from sqlalchemy import desc, select +from Tables import Toots + +class CRUDManager(): + + def saveToDatabase(self, dataframe, table:str, useIndex=False): + try: + dataframe.to_sql(table, engine, index=useIndex, if_exists="append") + except: + print(f'Could not save data to {table}!') + + def loadFromDatabase(self, table:str, indexColumn=None): + return pd.read_sql_table(table, databaseUrl, index_col=indexColumn) + + def getLastToot(self): + stmt = select(Toots.tootId).order_by(desc('datetime')) + return session.scalars(stmt).first() + + def calculateAggregates(self, column, aggregate='Count'): + if (aggregate=='Count'): + addGroup = f', {column} ' + else: + addGroup = '' + query = f'''SELECT DATE(datetime) as date {addGroup}, {aggregate}({column}) as {column}{aggregate} + FROM Toots + GROUP BY DATE(datetime)''' \ + + addGroup \ + + '''HAVING datetime >= DATE("now","-1 day") + AND datetime < DATE("now")''' + return pd.read_sql( + query, + databaseUrl, + parse_dates=["datetime"] + ) diff --git a/DbSetup.py b/DbSetup.py new file mode 100644 index 0000000..1898556 --- /dev/null +++ b/DbSetup.py @@ -0,0 +1,11 @@ +from sqlalchemy import create_engine +from sqlalchemy.orm import Session +from sqlalchemy.ext.declarative import declarative_base + +databaseUrl = 'sqlite:///database.db' +engine = create_engine(databaseUrl, future=True) +session = Session(engine) +Base = declarative_base() + +def init_db(): + Base.metadata.create_all(bind=engine) diff --git a/Main.py b/Main.py new file mode 100644 index 0000000..2dcdd3a --- /dev/null +++ b/Main.py @@ -0,0 +1,57 @@ +from CRUDManager import CRUDManager +from datetime import datetime +from DbSetup import init_db +import locale +from MastodonAccountManager import MastodonAccountManager +import matplotlib.pyplot as plt +from TootCrawler import TootCrawler +from sqlalchemy.sql import desc, select + +locale.setlocale(locale.LC_TIME, "de_DE.UTF-8") +init_db() + +mastodonAccountManager = MastodonAccountManager() +mastodonInstance = mastodonAccountManager.instance +""" +mastodonInstance.log_in( + 'USER-EMAIL', + 'PW', + to_file = 'hedonodon_usercred.secret' +) +""" + +tootCrawler = TootCrawler(mastodonInstance) +crudManager = CRUDManager() + +lastTootId = crudManager.getLastToot() +tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId) +sentimentsYesterday = crudManager.calculateAggregates('sentiment', 'Count') +compoundsYesterday = crudManager.calculateAggregates('compound', 'Avg') +if not tootsDataframe.empty: + crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False) + crudManager.saveToDatabase(dataframe=sentimentsYesterday, table='Sentiments', useIndex=True) + crudManager.saveToDatabase(dataframe=compoundsYesterday, table='Compounds', useIndex=True) + #print(sentimentsYesterday, 'sentimentsYesterday') + #print(compoundsYesterday, 'sentimentsYesterday') +else: + print('Nothing changed since last database insert!') + +TodayDate= datetime.strptime(sentimentsYesterday['date'][0], '%Y-%m-%d').strftime('%d.%m.%Y') +dataframe4PieChart = sentimentsYesterday.drop('date', axis=1).set_index('sentiment') +dataframe4LineChart = crudManager.loadFromDatabase('Compounds', 'date').drop('index', axis=1) + +fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10,10)) + +pieChartlabels = dataframe4PieChart.index.to_numpy() +pieChart = dataframe4PieChart.plot.pie(ax=axes[0], y='sentimentCount', ylabel="", labels=dataframe4PieChart['sentimentCount'], title=f'Moods of the toots on {TodayDate} of the local timeline on fedihum.org', colors = ['red', 'grey', 'green']) +chartBox = axes[0].get_position() +axes[0].set_position([chartBox.x0,chartBox.y0-0.2,chartBox.width,chartBox.height]) +axes[0].legend(pieChartlabels,loc='upper right', bbox_to_anchor=(1.3, 0.9)) +lineChart = dataframe4LineChart.plot.line(ax=axes[1], title='Compounds from max positive (1) to min neg (-1)') +axes[1].set_ylim([-1, 1]) + +plotFileUrl = f'./plots/{TodayDate}.png' +plt.savefig(plotFileUrl) + +media = mastodonInstance.media_post(plotFileUrl, mime_type="image/png", description=f"Sentiment analysis of local timeline on fedihum.org, showing the moods of the toots on, and the compounds up to {TodayDate}.") +mastodonInstance.status_post(f'The moods of the toots on and up to {TodayDate}.', media_ids=media, language='en') \ No newline at end of file diff --git a/MastodonAccountManager.py b/MastodonAccountManager.py new file mode 100644 index 0000000..6542659 --- /dev/null +++ b/MastodonAccountManager.py @@ -0,0 +1,5 @@ +from mastodon import Mastodon + +class MastodonAccountManager(): + def __init__(self): + self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret') diff --git a/README.md b/README.md new file mode 100644 index 0000000..fdbe2f7 --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +# Hedonodon +I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds. + +More Documentation coming soon! \ No newline at end of file diff --git a/SentiTooter.py b/SentiTooter.py new file mode 100644 index 0000000..b745c2f --- /dev/null +++ b/SentiTooter.py @@ -0,0 +1,19 @@ +from math import sqrt +from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer +import numpy as np + +class SentiTooter(): + """""" + def __init__(self): + self.sia = SentimentIntensityAnalyzer() + + + def analyze(self, toot): + compound = self.sia.polarity_scores(toot.content)['compound'] + if (compound > (1/3)): + return ['positive', compound] + elif (compound < (-1/3)): + return ['negative', compound] + else: + return ['neutral', compound] + diff --git a/Tables.py b/Tables.py new file mode 100644 index 0000000..4ae477f --- /dev/null +++ b/Tables.py @@ -0,0 +1,33 @@ +from DbSetup import Base +from sqlalchemy import Column, Date, Integer, Float, String + +class Toots(Base): + __tablename__ = 'Toots' + __table_args__ = {'extend_existing': True} + index = Column(Integer, primary_key=True) + compound = Column(Float) + datetime = Column(Date) + language = Column(String(3)) + sentiment = Column(String(8)) + tootId = Column(String(255)) + toot = Column(String(600)) + userName = Column(String(255)) + userId = Column(String(255)) + + + +class Sentiments(Base): + __tablename__ = 'Sentiments' + __table_args__ = {'extend_existing': True} + index = Column(Integer, primary_key=True) + sentimentCount = Column(Integer) + date = Column(Date, primary_key = True) + sentiment = Column(String(8)) + + +class Compounds(Base): + __tablename__ = 'Compounds' + __table_args__ = {'extend_existing': True} + index = Column(Integer, primary_key=True) + date = Column(Date, primary_key = True) + compoundAvg = Column(Float) \ No newline at end of file diff --git a/TootCrawler.py b/TootCrawler.py new file mode 100644 index 0000000..47b87f0 --- /dev/null +++ b/TootCrawler.py @@ -0,0 +1,43 @@ +import pytz +import pandas as pd +import re +from SentiTooter import SentiTooter +from pprint import pprint + + +class TootCrawler(): + + def __init__(self, mastodonInstance) -> None: + self.mastodonInstance = mastodonInstance + self.compilePattern = re.compile('<.*?>') + self.sentiTooter = SentiTooter() + self.localTimezone = pytz.timezone('Europe/Berlin') + + def getLocalTimeline(self, sinceId=None): + return self.mastodonInstance.timeline_local(since_id=sinceId) + + def cleanhtml(self, raw_html): + cleantext = re.sub(self.compilePattern, '', raw_html) + cleantext = re.sub(r'http\S+', '', cleantext) + return cleantext + + def buildTootsDataframe(self, sinceId=None): + toots = [] + + for i in self.getLocalTimeline(sinceId): + content = self.cleanhtml(i.content) + sentiment = self.sentiTooter.analyze(i) + toots.append( + { + "sentiment": sentiment[0], + "compound": sentiment[1], + "userName": i.account.display_name, + "userId": i.account.id, + "toot": content, + "datetime": i.created_at.astimezone(self.localTimezone), + "language": i.language, + "tootId": i.id + } + ) + toots.sort(key=lambda item:item.get('datetime')) + return pd.DataFrame.from_records(toots) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4880a8c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +mastodon.py +matplotlib +pandas +sqlalchemy +vader-multi +numpy \ No newline at end of file