add code documentation
This commit is contained in:
parent
4479bd2429
commit
bc842244c7
7 changed files with 261 additions and 31 deletions
|
|
@ -1,27 +1,87 @@
|
|||
from langdetect import detect
|
||||
import pytz
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
import re
|
||||
from SentiTooter import SentiTooter
|
||||
from pprint import pprint
|
||||
|
||||
class TootCrawler():
|
||||
"""Class to fetch the recent toots from fedihum.org."""
|
||||
|
||||
def __init__(self, mastodonInstance) -> None:
|
||||
def __init__(self, mastodonInstance: any) -> None:
|
||||
"""Initialize the Mastodon instance and depending classes.
|
||||
|
||||
Parameters
|
||||
------
|
||||
mastodonInstance: any
|
||||
The initialized Mastodon instance.
|
||||
"""
|
||||
self.mastodonInstance = mastodonInstance
|
||||
self.compilePattern = re.compile('<.*?>')
|
||||
self.sentiTooter = SentiTooter()
|
||||
self.localTimezone = pytz.timezone('Europe/Berlin')
|
||||
|
||||
def getLocalTimeline(self, minId=None):
|
||||
def getLocalTimeline(self, minId=None) -> any:
|
||||
"""Receave the local timeline
|
||||
|
||||
Parameters
|
||||
------
|
||||
minId: str | None
|
||||
The last fetched toot id from the database.
|
||||
|
||||
Returns
|
||||
------
|
||||
any
|
||||
The local Mastodon timeline from fedihum.org.
|
||||
"""
|
||||
return self.mastodonInstance.timeline_local(min_id=minId, limit=500)
|
||||
|
||||
def cleanhtml(self, raw_html):
|
||||
def cleanhtml(self, raw_html:str) -> str:
|
||||
"""remove brackets and http string from toots
|
||||
|
||||
Parameters
|
||||
------
|
||||
raw_html: str
|
||||
The toot content.
|
||||
Returns
|
||||
------
|
||||
str:
|
||||
The cleaned toot content.
|
||||
"""
|
||||
cleantext = re.sub(self.compilePattern, '', raw_html)
|
||||
cleantext = re.sub(r'http\S+', '', cleantext)
|
||||
return cleantext
|
||||
|
||||
def buildTootsDataframe(self, minId=None):
|
||||
def buildTootsDataframe(self, minId=None) -> DataFrame:
|
||||
"""Parse fetched toots from Mastodon to dataframe.
|
||||
|
||||
Parameters
|
||||
------
|
||||
minId: str | None
|
||||
The id of the last fetched toot.
|
||||
|
||||
Returns
|
||||
------
|
||||
DataFrame
|
||||
A Dataframe containing
|
||||
sentiment: str
|
||||
The sentiment (positive, neutral, negative)
|
||||
model: str
|
||||
The used sentiment model.
|
||||
toot: str
|
||||
The content of the toot.
|
||||
datetime: datetime
|
||||
The datetime of the toot.
|
||||
language: str
|
||||
The langage flag of the toot.
|
||||
userName: str.
|
||||
The user name of the toot.
|
||||
userId: str
|
||||
The user id.
|
||||
tootId: str
|
||||
The toot id.
|
||||
"""
|
||||
toots = []
|
||||
allTimelineResults = []
|
||||
timelinePagination = self.getLocalTimeline(minId)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue