Merge branch 'main' into with_cites
This commit is contained in:
commit
09fd313a89
9 changed files with 124 additions and 118 deletions
23
.gitignore
vendored
23
.gitignore
vendored
|
|
@ -1,11 +1,12 @@
|
||||||
database.db
|
database.db
|
||||||
plots
|
plots
|
||||||
instance
|
instance
|
||||||
__pycache__
|
__pycache__
|
||||||
hedonodon_clientcred.secret
|
hedonodon_clientcred.secret
|
||||||
hedonodon_usercred.secret
|
hedonodon_usercred.secret
|
||||||
.fleet
|
.fleet
|
||||||
test.py
|
test.py
|
||||||
.idea
|
.idea
|
||||||
cardiffnlp
|
cardiffnlp
|
||||||
venv
|
venv
|
||||||
|
logs.txt
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
from DbSetup import engine, session, databaseUrl
|
from DbSetup import connection, engine, session, databaseUrl
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from sqlalchemy import desc, select
|
from sqlalchemy import desc, select, sql
|
||||||
from Tables import Toots
|
from Tables import Toots
|
||||||
|
|
||||||
def calculateSentimentCount():
|
def calculateSentimentCount():
|
||||||
|
|
@ -11,8 +11,8 @@ def calculateSentimentCount():
|
||||||
HAVING datetime >= DATE("now","-1 day")
|
HAVING datetime >= DATE("now","-1 day")
|
||||||
AND datetime < DATE("now")'''
|
AND datetime < DATE("now")'''
|
||||||
return pd.read_sql(
|
return pd.read_sql(
|
||||||
query,
|
sql.text(query),
|
||||||
databaseUrl,
|
connection,
|
||||||
parse_dates=["datetime"]
|
parse_dates=["datetime"]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -51,8 +51,8 @@ class CRUDManager():
|
||||||
print(f'Could not save data to {table}!')
|
print(f'Could not save data to {table}!')
|
||||||
|
|
||||||
def loadFromDatabase(self, table:str, indexColumn=None):
|
def loadFromDatabase(self, table:str, indexColumn=None):
|
||||||
return pd.read_sql_table(table, databaseUrl, index_col=indexColumn)
|
return pd.read_sql_table(table, connection, index_col=indexColumn)
|
||||||
|
|
||||||
def getLastToot(self):
|
def getLastToot(self):
|
||||||
stmt = select(Toots.tootId).order_by(desc('datetime'))
|
stmt = select(Toots.tootId).order_by(desc('datetime'))
|
||||||
return session.scalars(stmt).first()
|
return session.scalars(stmt).first()
|
||||||
|
|
|
||||||
23
DbSetup.py
23
DbSetup.py
|
|
@ -1,11 +1,12 @@
|
||||||
from sqlalchemy import create_engine
|
from sqlalchemy import create_engine
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
from sqlalchemy.ext.declarative import declarative_base
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
|
||||||
databaseUrl = 'sqlite:///database.db'
|
databaseUrl = 'sqlite:///database.db'
|
||||||
engine = create_engine(databaseUrl, future=True)
|
engine = create_engine(databaseUrl, future=True)
|
||||||
session = Session(engine)
|
connection = engine.connect()
|
||||||
Base = declarative_base()
|
session = Session(engine)
|
||||||
|
Base = declarative_base()
|
||||||
def init_db():
|
|
||||||
Base.metadata.create_all(bind=engine)
|
def init_db():
|
||||||
|
Base.metadata.create_all(bind=engine)
|
||||||
|
|
|
||||||
4
Main.py
4
Main.py
|
|
@ -7,7 +7,7 @@ import matplotlib.pyplot as plt
|
||||||
import matplotlib.dates as mdates
|
import matplotlib.dates as mdates
|
||||||
from TootCrawler import TootCrawler
|
from TootCrawler import TootCrawler
|
||||||
|
|
||||||
locale.setlocale(locale.LC_TIME, "en_EN.UTF-8")
|
locale.setlocale(locale.LC_TIME, "en_US.UTF-8")
|
||||||
init_db()
|
init_db()
|
||||||
|
|
||||||
mastodonAccountManager = MastodonAccountManager()
|
mastodonAccountManager = MastodonAccountManager()
|
||||||
|
|
@ -84,7 +84,7 @@ axes[0].legend(pieChartlabels, loc='upper right', bbox_to_anchor=(0.9, 0.9))
|
||||||
# Line chart.
|
# Line chart.
|
||||||
lineChart = dataframe4LineChart.plot.line(
|
lineChart = dataframe4LineChart.plot.line(
|
||||||
ax=axes[1],
|
ax=axes[1],
|
||||||
title='Mean of all sentiments from max positive (1) to min negative (-1)'
|
title='"Mean" of sentiments, calculated from nominal values, pos(1), neu (0), neg (-1)!'
|
||||||
)
|
)
|
||||||
axes[1].grid(True)
|
axes[1].grid(True)
|
||||||
axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)])
|
axes[1].set_xlim([date(2023, 1, 1), date(2023, 12, 31)])
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
from mastodon import Mastodon
|
from mastodon import Mastodon
|
||||||
|
|
||||||
class MastodonAccountManager():
|
class MastodonAccountManager():
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret')
|
self.instance = Mastodon(client_id = 'hedonodon_clientcred.secret', access_token = 'hedonodon_usercred.secret')
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
# Hedonodon
|
# Hedonodon
|
||||||
I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds.
|
I'm using [vaderSentiment](https://pypi.org/project/vaderSentiment/) to calculate the compounds.
|
||||||
|
|
||||||
More Documentation coming soon!
|
More Documentation coming soon!
|
||||||
62
Tables.py
62
Tables.py
|
|
@ -1,32 +1,32 @@
|
||||||
from DbSetup import Base
|
from DbSetup import Base
|
||||||
from sqlalchemy import Column, Date, Integer, Float, String
|
from sqlalchemy import Column, Date, Integer, Float, String
|
||||||
|
|
||||||
class Toots(Base):
|
class Toots(Base):
|
||||||
__tablename__ = 'Toots'
|
__tablename__ = 'Toots'
|
||||||
__table_args__ = {'extend_existing': True}
|
__table_args__ = {'extend_existing': True}
|
||||||
index = Column(Integer, primary_key=True)
|
index = Column(Integer, primary_key=True)
|
||||||
model = Column(String(30))
|
model = Column(String(30))
|
||||||
datetime = Column(Date)
|
datetime = Column(Date)
|
||||||
language = Column(String(3))
|
language = Column(String(3))
|
||||||
sentiment = Column(String(8))
|
sentiment = Column(String(8))
|
||||||
tootId = Column(String(255))
|
tootId = Column(String(255))
|
||||||
toot = Column(String(600))
|
toot = Column(String(600))
|
||||||
userName = Column(String(255))
|
userName = Column(String(255))
|
||||||
userId = Column(String(255))
|
userId = Column(String(255))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class SentimentCounts(Base):
|
class SentimentCounts(Base):
|
||||||
__tablename__ = 'SentimentCounts'
|
__tablename__ = 'SentimentCounts'
|
||||||
__table_args__ = {'extend_existing': True}
|
__table_args__ = {'extend_existing': True}
|
||||||
index = Column(Integer, primary_key=True)
|
index = Column(Integer, primary_key=True)
|
||||||
sentimentCount = Column(Integer)
|
sentimentCount = Column(Integer)
|
||||||
date = Column(Date, primary_key=True)
|
date = Column(Date, primary_key=True)
|
||||||
sentiment = Column(String(8))
|
sentiment = Column(String(8))
|
||||||
|
|
||||||
class SentimentMeans(Base):
|
class SentimentMeans(Base):
|
||||||
__tablename__ = 'SentimentMeans'
|
__tablename__ = 'SentimentMeans'
|
||||||
__table_args__ = {'extend_existing': True}
|
__table_args__ = {'extend_existing': True}
|
||||||
index = Column(Integer, primary_key=True)
|
index = Column(Integer, primary_key=True)
|
||||||
date = Column(Date, primary_key=True)
|
date = Column(Date, primary_key=True)
|
||||||
SentimentsMean = Column(Float)
|
SentimentsMean = Column(Float)
|
||||||
|
|
@ -1,48 +1,51 @@
|
||||||
from langdetect import detect
|
from langdetect import detect
|
||||||
import pytz
|
import pytz
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import re
|
import re
|
||||||
from SentiTooter import SentiTooter
|
from SentiTooter import SentiTooter
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
|
|
||||||
class TootCrawler():
|
class TootCrawler():
|
||||||
|
|
||||||
def __init__(self, mastodonInstance) -> None:
|
def __init__(self, mastodonInstance) -> None:
|
||||||
self.mastodonInstance = mastodonInstance
|
self.mastodonInstance = mastodonInstance
|
||||||
self.compilePattern = re.compile('<.*?>')
|
self.compilePattern = re.compile('<.*?>')
|
||||||
self.sentiTooter = SentiTooter()
|
self.sentiTooter = SentiTooter()
|
||||||
self.localTimezone = pytz.timezone('Europe/Berlin')
|
self.localTimezone = pytz.timezone('Europe/Berlin')
|
||||||
|
|
||||||
def getLocalTimeline(self, minId=None):
|
def getLocalTimeline(self, minId=None):
|
||||||
return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
|
return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
|
||||||
|
|
||||||
def cleanhtml(self, raw_html):
|
def cleanhtml(self, raw_html):
|
||||||
cleantext = re.sub(self.compilePattern, '', raw_html)
|
cleantext = re.sub(self.compilePattern, '', raw_html)
|
||||||
cleantext = re.sub(r'http\S+', '', cleantext)
|
cleantext = re.sub(r'http\S+', '', cleantext)
|
||||||
return cleantext
|
return cleantext
|
||||||
|
|
||||||
def buildTootsDataframe(self, minId=None):
|
def buildTootsDataframe(self, minId=None):
|
||||||
toots = []
|
toots = []
|
||||||
allTimelineResults = []
|
allTimelineResults = []
|
||||||
timelinePagination = self.getLocalTimeline(minId)
|
timelinePagination = self.getLocalTimeline(minId)
|
||||||
|
|
||||||
while timelinePagination:
|
while timelinePagination:
|
||||||
allTimelineResults = allTimelineResults + timelinePagination
|
allTimelineResults = allTimelineResults + timelinePagination
|
||||||
timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
|
timelinePagination = self.mastodonInstance.fetch_previous(timelinePagination)
|
||||||
for i in allTimelineResults:
|
for i in allTimelineResults:
|
||||||
content = self.cleanhtml(i.content)
|
content = self.cleanhtml(i.content)
|
||||||
language = detect(content)
|
try:
|
||||||
sentiment = self.sentiTooter.analyze(language, content)
|
language = detect(content)
|
||||||
toot = {
|
except:
|
||||||
"sentiment": sentiment[0],
|
language = None
|
||||||
"model": sentiment[1],
|
sentiment = self.sentiTooter.analyze(language, content)
|
||||||
"toot": content,
|
toot = {
|
||||||
"datetime": i.created_at.astimezone(self.localTimezone),
|
"sentiment": sentiment[0],
|
||||||
"language": language,
|
"model": sentiment[1],
|
||||||
"userName": i.account.display_name,
|
"toot": content,
|
||||||
"userId": i.account.id,
|
"datetime": i.created_at.astimezone(self.localTimezone),
|
||||||
"tootId": i.id
|
"language": language,
|
||||||
}
|
"userName": i.account.display_name,
|
||||||
toots.append(toot)
|
"userId": i.account.id,
|
||||||
toots.sort(key=lambda item:item.get('datetime'))
|
"tootId": i.id
|
||||||
|
}
|
||||||
|
toots.append(toot)
|
||||||
|
toots.sort(key=lambda item:item.get('datetime'))
|
||||||
return pd.DataFrame.from_records(toots)
|
return pd.DataFrame.from_records(toots)
|
||||||
|
|
@ -3,9 +3,10 @@ matplotlib
|
||||||
pandas
|
pandas
|
||||||
sqlalchemy
|
sqlalchemy
|
||||||
vader-multi
|
vader-multi
|
||||||
|
langdetect
|
||||||
numpy
|
numpy
|
||||||
pytz
|
pytz
|
||||||
transformers
|
transformers
|
||||||
langdetect
|
wheel
|
||||||
germansentiment
|
germansentiment
|
||||||
scipy
|
scipy
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue