underway to wordcount
This commit is contained in:
parent
fae306916f
commit
3b677e5713
4 changed files with 24 additions and 18 deletions
|
|
@ -3,7 +3,6 @@ import pandas as pd
|
||||||
from sqlalchemy import desc, select
|
from sqlalchemy import desc, select
|
||||||
from Tables import Toots
|
from Tables import Toots
|
||||||
|
|
||||||
|
|
||||||
def calculateSentimentCount():
|
def calculateSentimentCount():
|
||||||
query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount
|
query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount
|
||||||
FROM Toots
|
FROM Toots
|
||||||
|
|
@ -32,6 +31,17 @@ def calculateSentimentMean(dataframe):
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def calculateWordCount():
|
||||||
|
query = f'''SELECT DATE(datetime) as date, language, sentiment, toot
|
||||||
|
FROM Toots
|
||||||
|
WHERE datetime >= DATE("now","-1 day")
|
||||||
|
AND datetime < DATE("now")'''
|
||||||
|
return pd.read_sql(
|
||||||
|
query,
|
||||||
|
engine,
|
||||||
|
parse_dates=["datetime"]
|
||||||
|
)
|
||||||
|
|
||||||
class CRUDManager():
|
class CRUDManager():
|
||||||
|
|
||||||
def saveToDatabase(self, dataframe, table:str, useIndex=False):
|
def saveToDatabase(self, dataframe, table:str, useIndex=False):
|
||||||
|
|
|
||||||
8
Main.py
8
Main.py
|
|
@ -1,4 +1,4 @@
|
||||||
from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean
|
from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean, calculateWordCount
|
||||||
from datetime import datetime, date
|
from datetime import datetime, date
|
||||||
from DbSetup import init_db
|
from DbSetup import init_db
|
||||||
import locale
|
import locale
|
||||||
|
|
@ -25,12 +25,16 @@ crudManager = CRUDManager()
|
||||||
|
|
||||||
lastTootId = crudManager.getLastToot()
|
lastTootId = crudManager.getLastToot()
|
||||||
tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
|
tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
|
||||||
exit()
|
|
||||||
if not tootsDataframe.empty:
|
if not tootsDataframe.empty:
|
||||||
crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
|
crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
|
||||||
else:
|
else:
|
||||||
print('Nothing changed since last database insert!')
|
print('Nothing changed since last database insert!')
|
||||||
|
|
||||||
|
wordCounts = calculateWordCount()
|
||||||
|
print(wordCounts);
|
||||||
|
print("exit programm")
|
||||||
|
exit()
|
||||||
sentimentsYesterday = calculateSentimentCount()
|
sentimentsYesterday = calculateSentimentCount()
|
||||||
sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
|
sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -41,15 +41,15 @@ class SentiTooter:
|
||||||
output = self.enModel(**encoded_input)
|
output = self.enModel(**encoded_input)
|
||||||
scores = output[0][0].detach().numpy()
|
scores = output[0][0].detach().numpy()
|
||||||
scores = softmax(scores)
|
scores = softmax(scores)
|
||||||
print(scores)
|
#print(scores)
|
||||||
sentimentIndexWithMaxScore = np.argmax(scores)
|
sentimentIndexWithMaxScore = np.argmax(scores)
|
||||||
sentimentLabel = self.labels[sentimentIndexWithMaxScore]
|
sentimentLabel = self.labels[sentimentIndexWithMaxScore]
|
||||||
sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment', max(scores)]
|
sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment', max(scores)]
|
||||||
print(sentiment)
|
#print(sentiment)
|
||||||
return sentiment
|
return sentiment
|
||||||
case _:
|
case _:
|
||||||
compound = self.sia.polarity_scores(content)['compound']
|
compound = self.sia.polarity_scores(content)['compound']
|
||||||
print(self.sia.polarity_scores(content), 'vaderSentiment')
|
#print(self.sia.polarity_scores(content), 'vaderSentiment')
|
||||||
if compound > (1 / 3):
|
if compound > (1 / 3):
|
||||||
return ['positive', 'vaderSentiment']
|
return ['positive', 'vaderSentiment']
|
||||||
elif compound < (-1 / 3):
|
elif compound < (-1 / 3):
|
||||||
|
|
@ -58,7 +58,6 @@ class SentiTooter:
|
||||||
return ['neutral', 'vaderSentiment']
|
return ['neutral', 'vaderSentiment']
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def initModel(self):
|
def initModel(self):
|
||||||
# PT
|
# PT
|
||||||
tokenizer = AutoTokenizer.from_pretrained(self.enModelType)
|
tokenizer = AutoTokenizer.from_pretrained(self.enModelType)
|
||||||
|
|
@ -66,13 +65,3 @@ class SentiTooter:
|
||||||
model = AutoModelForSequenceClassification.from_pretrained(self.enModelType)
|
model = AutoModelForSequenceClassification.from_pretrained(self.enModelType)
|
||||||
model.save_pretrained(self.enModelType)
|
model.save_pretrained(self.enModelType)
|
||||||
return model, tokenizer
|
return model, tokenizer
|
||||||
|
|
||||||
# # TF
|
|
||||||
# model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
|
|
||||||
# model.save_pretrained(MODEL)
|
|
||||||
|
|
||||||
# text = "Good night 😊"
|
|
||||||
# encoded_input = tokenizer(text, return_tensors='tf')
|
|
||||||
# output = model(encoded_input)
|
|
||||||
# scores = output[0][0].numpy()
|
|
||||||
# scores = softmax(scores)
|
|
||||||
|
|
|
||||||
|
|
@ -6,3 +6,6 @@ vader-multi
|
||||||
numpy
|
numpy
|
||||||
pytz
|
pytz
|
||||||
transformers
|
transformers
|
||||||
|
langdetect
|
||||||
|
germansentiment
|
||||||
|
scipy
|
||||||
Loading…
Add table
Add a link
Reference in a new issue