underway to wordcount

2023-03-15 13:21:44 +01:00 · 2023-03-15 13:21:44 +01:00 · 3b677e5713
commit 3b677e5713
parent fae306916f
4 changed files with 24 additions and 18 deletions
--- a/CRUDManager.py
+++ b/CRUDManager.py
@ -3,7 +3,6 @@ import pandas as pd
 from sqlalchemy import desc, select
 from Tables import Toots

-
 def calculateSentimentCount():
    query = f'''SELECT DATE(datetime) as date, sentiment, COUNT(sentiment) as sentimentCount
                FROM Toots
@ -32,6 +31,17 @@ def calculateSentimentMean(dataframe):
        ]
    )

+def calculateWordCount():
+    query = f'''SELECT DATE(datetime) as date, language, sentiment, toot
+                FROM Toots
+                WHERE datetime >= DATE("now","-1 day")
+                AND datetime < DATE("now")'''
+    return pd.read_sql(
+        query,
+        engine,
+        parse_dates=["datetime"]
+    )
+
 class CRUDManager():

    def saveToDatabase(self, dataframe, table:str, useIndex=False):
--- a/Main.py
+++ b/Main.py
@ -1,4 +1,4 @@
-from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean
+from CRUDManager import CRUDManager, calculateSentimentCount, calculateSentimentMean, calculateWordCount
 from datetime import datetime, date
 from DbSetup import init_db
 import locale
@ -25,12 +25,16 @@ crudManager = CRUDManager()

 lastTootId = crudManager.getLastToot()
 tootsDataframe = tootCrawler.buildTootsDataframe(lastTootId)
-exit()
+
 if not tootsDataframe.empty:
    crudManager.saveToDatabase(tootsDataframe, 'Toots', useIndex=False)
 else:
    print('Nothing changed since last database insert!')

+wordCounts = calculateWordCount()
+print(wordCounts);
+print("exit programm")
+exit()
 sentimentsYesterday = calculateSentimentCount()
 sentimentMeansYesterday = calculateSentimentMean(sentimentsYesterday)

--- a/SentiTooter.py
+++ b/SentiTooter.py
@ -41,15 +41,15 @@ class SentiTooter:
                output = self.enModel(**encoded_input)
                scores = output[0][0].detach().numpy()
                scores = softmax(scores)
-                print(scores)
+                #print(scores)
                sentimentIndexWithMaxScore = np.argmax(scores)
                sentimentLabel = self.labels[sentimentIndexWithMaxScore]
                sentiment = [sentimentLabel, 'twitter-roberta-base-sentiment', max(scores)]
-                print(sentiment)
+                #print(sentiment)
                return sentiment
            case _:
                compound = self.sia.polarity_scores(content)['compound']
-                print(self.sia.polarity_scores(content), 'vaderSentiment')
+                #print(self.sia.polarity_scores(content), 'vaderSentiment')
                if compound > (1 / 3):
                    return ['positive', 'vaderSentiment']
                elif compound < (-1 / 3):
@ -58,7 +58,6 @@ class SentiTooter:
                    return ['neutral', 'vaderSentiment']


-
    def initModel(self):
        # PT
        tokenizer = AutoTokenizer.from_pretrained(self.enModelType)
@ -66,13 +65,3 @@ class SentiTooter:
        model = AutoModelForSequenceClassification.from_pretrained(self.enModelType)
        model.save_pretrained(self.enModelType)
        return model, tokenizer
-
-    # # TF
-    # model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
-    # model.save_pretrained(MODEL)
-
-    # text = "Good night 😊"
-    # encoded_input = tokenizer(text, return_tensors='tf')
-    # output = model(encoded_input)
-    # scores = output[0][0].numpy()
-    # scores = softmax(scores)
--- a/requirements.txt
+++ b/requirements.txt
@ -5,4 +5,7 @@ sqlalchemy
 vader-multi
 numpy
 pytz
-transformers
+transformers
+langdetect
+germansentiment
+scipy