underway to wordcount

This commit is contained in:
rnsrk 2023-03-15 13:21:44 +01:00
parent fae306916f
commit 3b677e5713
4 changed files with 24 additions and 18 deletions

View file

@ -3,7 +3,6 @@ import pandas as pd
from sqlalchemy import desc, select from sqlalchemy import desc, select
from Tables import Toots from Tables import Toots
def calculateSentimentCount(): def calculateSentimentCount():
query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount
FROM Toots FROM Toots
@ -32,6 +31,17 @@ def calculateSentimentMean(dataframe):
] ]
) )
def calculateWordCount():
query = f'''SELECT DATE(datetime) as date, language, sentiment, toot
FROM Toots
WHERE datetime >= DATE("now","-1 day")
AND datetime < DATE("now")'''
return pd.read_sql(
query,
engine,
parse_dates=["datetime"]
)
class CRUDManager(): class CRUDManager():
def saveToDatabase(self, dataframe, table:str, useIndex=False): def saveToDatabase(self, dataframe, table:str, useIndex=False):

View file

@ -1,4 +1,4 @@
from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean, calculateWordCount
from datetime import datetime, date from datetime import datetime, date
from DbSetup import init_db from DbSetup import init_db
import locale import locale
@ -25,12 +25,16 @@ crudManager = CRUDManager()
lastTootId = crudManager.getLastToot() lastTootId = crudManager.getLastToot()
tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId) tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
exit()
if not tootsDataframe.empty: if not tootsDataframe.empty:
crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False) crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
else: else:
print('Nothing changed since last database insert!') print('Nothing changed since last database insert!')
wordCounts = calculateWordCount()
print(wordCounts);
print("exit programm")
exit()
sentimentsYesterday = calculateSentimentCount() sentimentsYesterday = calculateSentimentCount()
sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday) sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)

View file

@ -41,15 +41,15 @@ class SentiTooter:
output = self.enModel(**encoded_input) output = self.enModel(**encoded_input)
scores = output[0][0].detach().numpy() scores = output[0][0].detach().numpy()
scores = softmax(scores) scores = softmax(scores)
print(scores) #print(scores)
sentimentIndexWithMaxScore = np.argmax(scores) sentimentIndexWithMaxScore = np.argmax(scores)
sentimentLabel = self.labels[sentimentIndexWithMaxScore] sentimentLabel = self.labels[sentimentIndexWithMaxScore]
sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment', max(scores)] sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment', max(scores)]
print(sentiment) #print(sentiment)
return sentiment return sentiment
case _: case _:
compound = self.sia.polarity_scores(content)['compound'] compound = self.sia.polarity_scores(content)['compound']
print(self.sia.polarity_scores(content), 'vaderSentiment') #print(self.sia.polarity_scores(content), 'vaderSentiment')
if compound > (1 / 3): if compound > (1 / 3):
return ['positive', 'vaderSentiment'] return ['positive', 'vaderSentiment']
elif compound < (-1 / 3): elif compound < (-1 / 3):
@ -58,7 +58,6 @@ class SentiTooter:
return ['neutral', 'vaderSentiment'] return ['neutral', 'vaderSentiment']
def initModel(self): def initModel(self):
# PT # PT
tokenizer = AutoTokenizer.from_pretrained(self.enModelType) tokenizer = AutoTokenizer.from_pretrained(self.enModelType)
@ -66,13 +65,3 @@ class SentiTooter:
model = AutoModelForSequenceClassification.from_pretrained(self.enModelType) model = AutoModelForSequenceClassification.from_pretrained(self.enModelType)
model.save_pretrained(self.enModelType) model.save_pretrained(self.enModelType)
return model, tokenizer return model, tokenizer
# # TF
# model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
# model.save_pretrained(MODEL)
# text = "Good night 😊"
# encoded_input = tokenizer(text, return_tensors='tf')
# output = model(encoded_input)
# scores = output[0][0].numpy()
# scores = softmax(scores)

View file

@ -6,3 +6,6 @@ vader-multi
numpy numpy
pytz pytz
transformers transformers
langdetect
germansentiment
scipy