diff --git a/.gitignore b/.gitignore index d946327..613ca0d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,12 @@ -database.db -plots -instance -__pycache__ -hedonodon_clientcred.secret -hedonodon_usercred.secret -.fleet -test.py -.idea -cardiffnlp -venv \ No newline at end of file +database.db +plots +instance +__pycache__ +hedonodon_clientcred.secret +hedonodon_usercred.secret +.fleet +test.py +.idea +cardiffnlp +venv +logs.txt diff --git a/CRUDManager.py b/CRUDManager.py index e18a575..ea7e7e5 100644 --- a/CRUDManager.py +++ b/CRUDManager.py @@ -1,6 +1,6 @@ -from DbSetup import engine, session, databaseUrl +from DbSetup import connection, engine, session, databaseUrl import pandas as pd -from sqlalchemy import desc, select +from sqlalchemy import desc, select, sql from Tables import Toots def calculateSentimentCount(): @@ -11,8 +11,8 @@ def calculateSentimentCount(): HAVING datetime >= DATE("now","-1 day") AND datetime < DATE("now")''' return pd.read_sql( - query, - databaseUrl, + sql.text(query), + connection, parse_dates=["datetime"] ) @@ -51,8 +51,8 @@ class CRUDManager(): print(f'Could not save data to {table}!') def loadFromDatabase(self, table:str, indexColumn=None): - return pd.read_sql_table(table, databaseUrl, index_col=indexColumn) + return pd.read_sql_table(table, connection, index_col=indexColumn) def getLastToot(self): stmt = select(Toots.tootId).order_by(desc('datetime')) - return session.scalars(stmt).first() \ No newline at end of file + return session.scalars(stmt).first() diff --git a/DbSetup.py b/DbSetup.py index 1898556..c787928 100644 --- a/DbSetup.py +++ b/DbSetup.py @@ -1,11 +1,12 @@ -from sqlalchemy import create_engine -from sqlalchemy.orm import Session -from sqlalchemy.ext.declarative import declarative_base - -databaseUrl = 'sqlite:///database.db' -engine = create_engine(databaseUrl, future=True) -session = Session(engine) -Base = declarative_base() - -def init_db(): - Base.metadata.create_all(bind=engine) +from sqlalchemy import create_engine +from sqlalchemy.orm import Session +from sqlalchemy.ext.declarative import declarative_base + +databaseUrl = 'sqlite:///database.db' +engine = create_engine(databaseUrl, future=True) +connection = engine.connect() +session = Session(engine) +Base = declarative_base() + +def init_db(): + Base.metadata.create_all(bind=engine) diff --git a/Main.py b/Main.py index e43e2ca..56ba6b7 100644 --- a/Main.py +++ b/Main.py @@ -7,7 +7,7 @@ import matplotlib.pyplot as plt import matplotlib.dates as mdates from TootCrawler import TootCrawler -locale.setlocale(locale.LC_TIME, "en_EN.UTF-8") +locale.setlocale(locale.LC_TIME, "en_US.UTF-8") init_db() mastodonAccountManager = MastodonAccountManager() @@ -84,7 +84,7 @@ axes[0].legend(pieChartlabels, loc='upper right', bbox_to_anchor=(0.9, 0.9)) # Line chart. lineChart = dataframe4LineChart.plot.line( ax=axes[1], - title='Mean of all sentiments from max positive (1) to min negative (-1)' + title='"Mean" of sentiments, calculated from nominal values, pos(1), neu (0), neg (-1)!' ) axes[1].grid(True) axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)]) diff --git a/MastodonAccountManager.py b/MastodonAccountManager.py index 9c51e54..68d62bb 100644 --- a/MastodonAccountManager.py +++ b/MastodonAccountManager.py @@ -1,5 +1,5 @@ -from mastodon import Mastodon - -class MastodonAccountManager(): - def __init__(self): - self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret') +from mastodon import Mastodon + +class MastodonAccountManager(): + def __init__(self): + self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret') diff --git a/README.md b/README.md index fdbe2f7..5440dc1 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Hedonodon -I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds. - +# Hedonodon +I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds. + More Documentation coming soon! \ No newline at end of file diff --git a/Tables.py b/Tables.py index 78aa412..c64178a 100644 --- a/Tables.py +++ b/Tables.py @@ -1,32 +1,32 @@ -from DbSetup import Base -from sqlalchemy import Column, Date, Integer, Float, String - -class Toots(Base): - __tablename__ = 'Toots' - __table_args__ = {'extend_existing': True} - index = Column(Integer, primary_key=True) - model = Column(String(30)) - datetime = Column(Date) - language = Column(String(3)) - sentiment = Column(String(8)) - tootId = Column(String(255)) - toot = Column(String(600)) - userName = Column(String(255)) - userId = Column(String(255)) - - - -class SentimentCounts(Base): - __tablename__ = 'SentimentCounts' - __table_args__ = {'extend_existing': True} - index = Column(Integer, primary_key=True) - sentimentCount = Column(Integer) - date = Column(Date, primary_key=True) - sentiment = Column(String(8)) - -class SentimentMeans(Base): - __tablename__ = 'SentimentMeans' - __table_args__ = {'extend_existing': True} - index = Column(Integer, primary_key=True) - date = Column(Date, primary_key=True) +from DbSetup import Base +from sqlalchemy import Column, Date, Integer, Float, String + +class Toots(Base): + __tablename__ = 'Toots' + __table_args__ = {'extend_existing': True} + index = Column(Integer, primary_key=True) + model = Column(String(30)) + datetime = Column(Date) + language = Column(String(3)) + sentiment = Column(String(8)) + tootId = Column(String(255)) + toot = Column(String(600)) + userName = Column(String(255)) + userId = Column(String(255)) + + + +class SentimentCounts(Base): + __tablename__ = 'SentimentCounts' + __table_args__ = {'extend_existing': True} + index = Column(Integer, primary_key=True) + sentimentCount = Column(Integer) + date = Column(Date, primary_key=True) + sentiment = Column(String(8)) + +class SentimentMeans(Base): + __tablename__ = 'SentimentMeans' + __table_args__ = {'extend_existing': True} + index = Column(Integer, primary_key=True) + date = Column(Date, primary_key=True) SentimentsMean = Column(Float) \ No newline at end of file diff --git a/TootCrawler.py b/TootCrawler.py index 1b081c2..a657a5d 100644 --- a/TootCrawler.py +++ b/TootCrawler.py @@ -1,48 +1,51 @@ -from langdetect import detect -import pytz -import pandas as pd -import re -from SentiTooter import SentiTooter -from pprint import pprint - -class TootCrawler(): - - def __init__(self, mastodonInstance) -> None: - self.mastodonInstance = mastodonInstance - self.compilePattern = re.compile('<.*?>') - self.sentiTooter = SentiTooter() - self.localTimezone = pytz.timezone('Europe/Berlin') - - def getLocalTimeline(self, minId=None): - return self.mastodonInstance.timeline_local(min_id=minId, limit=500) - - def cleanhtml(self, raw_html): - cleantext = re.sub(self.compilePattern, '', raw_html) - cleantext = re.sub(r'http\S+', '', cleantext) - return cleantext - - def buildTootsDataframe(self, minId=None): - toots = [] - allTimelineResults = [] - timelinePagination = self.getLocalTimeline(minId) - - while timelinePagination: - allTimelineResults = allTimelineResults + timelinePagination - timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination) - for i in allTimelineResults: - content = self.cleanhtml(i.content) - language = detect(content) - sentiment = self.sentiTooter.analyze(language, content) - toot = { - "sentiment": sentiment[0], - "model": sentiment[1], - "toot": content, - "datetime": i.created_at.astimezone(self.localTimezone), - "language": language, - "userName": i.account.display_name, - "userId": i.account.id, - "tootId": i.id - } - toots.append(toot) - toots.sort(key=lambda item:item.get('datetime')) +from langdetect import detect +import pytz +import pandas as pd +import re +from SentiTooter import SentiTooter +from pprint import pprint + +class TootCrawler(): + + def __init__(self, mastodonInstance) -> None: + self.mastodonInstance = mastodonInstance + self.compilePattern = re.compile('<.*?>') + self.sentiTooter = SentiTooter() + self.localTimezone = pytz.timezone('Europe/Berlin') + + def getLocalTimeline(self, minId=None): + return self.mastodonInstance.timeline_local(min_id=minId, limit=500) + + def cleanhtml(self, raw_html): + cleantext = re.sub(self.compilePattern, '', raw_html) + cleantext = re.sub(r'http\S+', '', cleantext) + return cleantext + + def buildTootsDataframe(self, minId=None): + toots = [] + allTimelineResults = [] + timelinePagination = self.getLocalTimeline(minId) + + while timelinePagination: + allTimelineResults = allTimelineResults + timelinePagination + timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination) + for i in allTimelineResults: + content = self.cleanhtml(i.content) + try: + language = detect(content) + except: + language = None + sentiment = self.sentiTooter.analyze(language, content) + toot = { + "sentiment": sentiment[0], + "model": sentiment[1], + "toot": content, + "datetime": i.created_at.astimezone(self.localTimezone), + "language": language, + "userName": i.account.display_name, + "userId": i.account.id, + "tootId": i.id + } + toots.append(toot) + toots.sort(key=lambda item:item.get('datetime')) return pd.DataFrame.from_records(toots) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index d280535..2cf3aab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,9 +3,10 @@ matplotlib pandas sqlalchemy vader-multi +langdetect numpy pytz transformers -langdetect +wheel germansentiment -scipy \ No newline at end of file +scipy