first commit
This commit is contained in:
commit
52145ecabf
10 changed files with 538 additions and 0 deletions
10
.gitignore
vendored
Normal file
10
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
kram
|
||||||
|
docs
|
||||||
|
schemas
|
||||||
|
test-docs
|
||||||
|
test-schemas
|
||||||
|
database.db
|
||||||
|
test.db
|
||||||
|
venv
|
||||||
|
.idea
|
||||||
|
__pycache__
|
||||||
35
README.md
Normal file
35
README.md
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
# Good Bye HIDA
|
||||||
|
Small script to transform XML Documents of the HIDA/MIDAS architecture to a sqlite database.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
create a virtual environment:
|
||||||
|
```bash
|
||||||
|
python3 -m venv venv
|
||||||
|
```
|
||||||
|
activate the virtual environment:
|
||||||
|
```bash
|
||||||
|
source venv/bin/activate
|
||||||
|
```
|
||||||
|
install requirements:
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
place the XML files in the `docs` folder or for evaluation purposes few files in the `test-docs` folder.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
To have a test run, place XML-files in a dir named `test-docs`, then type
|
||||||
|
```bash
|
||||||
|
python3 goodByeHida.py --buildSchemas True
|
||||||
|
```
|
||||||
|
You will get a dir `test-schemas` and a sqlite database `test.db` with the imported data.
|
||||||
|
|
||||||
|
If everything looks good you can run the script with the `docs` folder:
|
||||||
|
```bash
|
||||||
|
python3 goodByeHida.py --production True --buildSchemas True
|
||||||
|
```
|
||||||
|
You will get a dir `schemas` and a sqlite database `databse.db` with the imported data.
|
||||||
|
|
||||||
|
If you like to restart the process and delete the database, type:
|
||||||
|
```bash
|
||||||
|
python3 goodByeHida.py --production True --buildSchemas True --deleteDatabase True
|
||||||
|
```
|
||||||
5
__init__.py
Normal file
5
__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
"""Top-Level package for the project."""
|
||||||
|
# __init__.py
|
||||||
|
|
||||||
|
__app_name__ = "rptodo"
|
||||||
|
__version__ = "0.1.0"
|
||||||
170
buildSchemas.py
Normal file
170
buildSchemas.py
Normal file
|
|
@ -0,0 +1,170 @@
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
from utils import cleanEntityName
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
import shutil
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
def processNode(node, schemaDir: str, parentName: str = None) -> set:
|
||||||
|
""" Process a node.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
node (Element): The node to process.
|
||||||
|
schemaDir (str): The path to the directory to store the schemas.
|
||||||
|
parentName (str, optional): The name of the parent node. Defaults to None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
set: The set of columns.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if node.tag == "block":
|
||||||
|
# If the node is a block, it is the root node.
|
||||||
|
key_lbl: str = cleanEntityName(f"{node.get('txt')}") # The name of the column.
|
||||||
|
columns: set = set([]) # The set of columns.
|
||||||
|
else:
|
||||||
|
# If the node is not a block, it is a child node.
|
||||||
|
key_lbl: str = cleanEntityName(f"{node.get('key')}_{node.get('lbl')}") # The name of the column.
|
||||||
|
columns: set = {f"f__{key_lbl}"} # The set of columns with its own name, cause it has children.
|
||||||
|
for child in node:
|
||||||
|
if len(child) > 0:
|
||||||
|
# If the child node has children, process the child node.
|
||||||
|
processNode(child, schemaDir, key_lbl) # The columns of the child node.
|
||||||
|
elif 'txt' in child.attrib:
|
||||||
|
# If the child node has a text attribute, we need no column.
|
||||||
|
createRelTable(schemaDir, key_lbl, cleanEntityName(f"{child.get('key')}_{child.get('lbl')}"))
|
||||||
|
childName: str = cleanEntityName(f"{child.get('key')}_{child.get('lbl')}") # The name of the child column.
|
||||||
|
|
||||||
|
childColumns = set([f"f__uuid", f"f__{childName}"])
|
||||||
|
filePathEntity: str = os.path.join(schemaDir, f"c__{childName}.json")
|
||||||
|
if os.path.exists(filePathEntity):
|
||||||
|
# If the entity file exists, load the existing columns.
|
||||||
|
with open(filePathEntity, 'r', encoding='utf-8') as f:
|
||||||
|
# Load the existing columns from the entity file.
|
||||||
|
existingChildColumns: list = json.load(f).get("columns", [])
|
||||||
|
childColumns.update(existingChildColumns)
|
||||||
|
with open(filePathEntity, 'w', encoding='utf-8') as f:
|
||||||
|
# Open the entity file to write.
|
||||||
|
|
||||||
|
# Write the entity file with the columns.
|
||||||
|
json.dump({"name": f"c__{childName}", "columns": list(childColumns)}, f, ensure_ascii=False)
|
||||||
|
else:
|
||||||
|
# Iterate through the children of the node.
|
||||||
|
childName: str = cleanEntityName(f"{child.get('key')}_{child.get('lbl')}") # The name of the child column.
|
||||||
|
# Add the child column to the set of columns.
|
||||||
|
columns.add(f"f__{childName}")
|
||||||
|
|
||||||
|
if columns and len(node) > 0:
|
||||||
|
# Check if the node has children
|
||||||
|
|
||||||
|
columnsList: list = sorted(list(columns)) # Sorted list of the columns.
|
||||||
|
|
||||||
|
# Add the uuid column to the list of columns
|
||||||
|
columnsList.append("f__uuid")
|
||||||
|
filePathEntity: str = os.path.join(schemaDir, f"c__{key_lbl}.json") # The path to the entity file.
|
||||||
|
if os.path.exists(filePathEntity):
|
||||||
|
# If the entity file exists, load the existing columns.
|
||||||
|
with open(filePathEntity, 'r', encoding='utf-8') as f:
|
||||||
|
# Load the existing columns from the entity file.
|
||||||
|
existingColumns: list = json.load(f).get("columns", []) # The existing columns.
|
||||||
|
|
||||||
|
# Add the existing columns to the list of columns.
|
||||||
|
columnsList.extend(existingColumns)
|
||||||
|
|
||||||
|
# Remove duplicates
|
||||||
|
columnsList = sorted(list(set(columnsList)))
|
||||||
|
|
||||||
|
with open(filePathEntity, 'w', encoding='utf-8') as f:
|
||||||
|
# Open the entity file to write.
|
||||||
|
|
||||||
|
# Write the entity file with the columns.
|
||||||
|
json.dump({"name": f"c__{key_lbl}", "columns": columnsList}, f, ensure_ascii=False)
|
||||||
|
if parentName:
|
||||||
|
# If the node has a parent, create a relationship table.
|
||||||
|
createRelTable(schemaDir, parentName, key_lbl)
|
||||||
|
return columns
|
||||||
|
|
||||||
|
|
||||||
|
def processXmlFile(filePath, schemaDir):
|
||||||
|
"""Process an XML file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filePath (str): The path to the XML file.
|
||||||
|
schemaDir (str): The path to the directory to store the schemas.
|
||||||
|
"""
|
||||||
|
tree = ET.parse(filePath) # The XML tree.
|
||||||
|
root = tree.getroot() # The root of the XML tree.
|
||||||
|
|
||||||
|
os.makedirs(schemaDir, exist_ok=True)
|
||||||
|
|
||||||
|
for block in root.iter('block'):
|
||||||
|
# Iterate through the blocks in the XML file and process each block.
|
||||||
|
if 'txt' in block.attrib:
|
||||||
|
# If the block has a text attribute, process the block.
|
||||||
|
columns: set = processNode(block, schemaDir) # The columns of the block.
|
||||||
|
columnsList: list = sorted(list(columns)) # Sorted list of the columns.
|
||||||
|
filePath: str = os.path.join(schemaDir, f"c__{block.get('txt')}.json") # The path to the file.
|
||||||
|
|
||||||
|
if os.path.exists(filePath):
|
||||||
|
# If the file exists, load the existing columns.
|
||||||
|
with open(filePath, 'r', encoding='utf-8') as f:
|
||||||
|
# Load the existing columns from the file.
|
||||||
|
existingColumns: list = json.load(f).get("columns", []) # The existing columns.
|
||||||
|
|
||||||
|
# Add the existing columns to the list of columns.
|
||||||
|
columnsList.extend(existingColumns)
|
||||||
|
|
||||||
|
# Remove duplicates from the list of columns.
|
||||||
|
columnsList = sorted(list(set(columnsList)))
|
||||||
|
|
||||||
|
with open(filePath, 'w', encoding='utf-8') as f:
|
||||||
|
# Open the file to write.
|
||||||
|
|
||||||
|
# Write the file with the columns.
|
||||||
|
json.dump({"name": f"c__{block.get('txt')}", "columns": columnsList}, f, ensure_ascii=False)
|
||||||
|
|
||||||
|
|
||||||
|
def buildSchemas(dirPath, schemaDir):
|
||||||
|
"""Parse schemas from XML files and saves them as json.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dirPath (str): The path to the directory containing the XML files.
|
||||||
|
schemaDir (str): The path to the directory to store the schemas.
|
||||||
|
"""
|
||||||
|
if os.path.exists(schemaDir):
|
||||||
|
# Remove the existing schema directory
|
||||||
|
shutil.rmtree(schemaDir)
|
||||||
|
|
||||||
|
# Get the total number of XML files
|
||||||
|
totalFiles = sum([len([f for f in files if f.endswith('.xml')]) for r, d, files in os.walk(dirPath)])
|
||||||
|
|
||||||
|
with tqdm(total=totalFiles, desc="Processing XML files", ncols=75) as pbar:
|
||||||
|
for dirpath, dirnames, filenames in os.walk(dirPath):
|
||||||
|
# Walk through the directory and process each XML file
|
||||||
|
for fileName in filenames:
|
||||||
|
if fileName.endswith('.xml'):
|
||||||
|
processXmlFile(os.path.join(dirpath, fileName), schemaDir)
|
||||||
|
# Update the progress bar
|
||||||
|
pbar.update(1)
|
||||||
|
print('Schemas built.')
|
||||||
|
|
||||||
|
|
||||||
|
def createRelTable(schemaDir: str, parentName: str, key_lbl: str):
|
||||||
|
"""Create a relationship table.
|
||||||
|
Args:
|
||||||
|
schemaDir (str): The path to the directory to store the schemas.
|
||||||
|
parentName (str): The name of the parent node.
|
||||||
|
key_lbl (str): The name of the column.
|
||||||
|
"""
|
||||||
|
tableName = f"r__{parentName}__{key_lbl}"
|
||||||
|
filePathRelTable: str = os.path.join(schemaDir,
|
||||||
|
f"{tableName}.json"
|
||||||
|
) # The path to the relationship table file.
|
||||||
|
|
||||||
|
with open(filePathRelTable, 'w', encoding='utf-8') as f:
|
||||||
|
# Open the relationship table file to write.
|
||||||
|
|
||||||
|
# Write the relationship table file with the columns.
|
||||||
|
json.dump(
|
||||||
|
{"name": tableName, "columns": [f"f__{parentName}__uuid", f"f__{key_lbl}__uuid"]},
|
||||||
|
f, ensure_ascii=False)
|
||||||
64
goodByeHida.py
Normal file
64
goodByeHida.py
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
import argparse
|
||||||
|
from buildSchemas import buildSchemas
|
||||||
|
from distutils.util import strtobool
|
||||||
|
from importer import Importer
|
||||||
|
from initDb import initDb
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Create the parser
|
||||||
|
parser = argparse.ArgumentParser(description="Run the program with specific configurations.")
|
||||||
|
|
||||||
|
# Add the arguments
|
||||||
|
parser.add_argument('--production', type=str, default='False', help='Set to True if you want to parse the docs folder, else if parse test-docs')
|
||||||
|
parser.add_argument('--buildSchemas', type=str, default='False', help='Set to True to rebuild the JSONs for the database schemas')
|
||||||
|
parser.add_argument('--dropDb', type=str, default='False', help='Set to True to drop the database to restart from scratch')
|
||||||
|
|
||||||
|
# Parse the arguments
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
_production = bool(strtobool(args.production))
|
||||||
|
_buildSchemas = bool(strtobool(args.buildSchemas))
|
||||||
|
_dropDb = bool(strtobool(args.dropDb))
|
||||||
|
|
||||||
|
if _production:
|
||||||
|
print('Running in production mode.')
|
||||||
|
docsDir: str = './docs/' # The directory containing the XML files.
|
||||||
|
schemaDir: str = './schemas/' # The directory to store the schemas.
|
||||||
|
else:
|
||||||
|
print('Running in test mode.')
|
||||||
|
docsDir = './test-docs/'
|
||||||
|
schemaDir = './test-schemas/'
|
||||||
|
|
||||||
|
if _buildSchemas:
|
||||||
|
print('Creating the schema jsons...')
|
||||||
|
buildSchemas(docsDir, schemaDir)
|
||||||
|
|
||||||
|
if _dropDb:
|
||||||
|
# Renew the database
|
||||||
|
print('Remove the database...')
|
||||||
|
if _production:
|
||||||
|
dbName = 'database.db'
|
||||||
|
else:
|
||||||
|
dbName = 'test.db'
|
||||||
|
if os.path.exists(dbName):
|
||||||
|
os.remove(dbName)
|
||||||
|
print('Database removed.')
|
||||||
|
else:
|
||||||
|
print('Database does not exist.')
|
||||||
|
|
||||||
|
|
||||||
|
# Initialize the database
|
||||||
|
print('Initializing the database...')
|
||||||
|
engine, metadata = initDb(_production, schemaDir)
|
||||||
|
if engine == False:
|
||||||
|
print('Database initialization failed.')
|
||||||
|
exit()
|
||||||
|
|
||||||
|
|
||||||
|
# Import the data
|
||||||
|
print('Importing the data...')
|
||||||
|
importer = Importer(engine, metadata, docsDir)
|
||||||
|
importer.importData()
|
||||||
|
|
||||||
|
print('Finished.')
|
||||||
|
|
||||||
147
importer.py
Normal file
147
importer.py
Normal file
|
|
@ -0,0 +1,147 @@
|
||||||
|
import os
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
import pandas as pd
|
||||||
|
import uuid
|
||||||
|
from utils import cleanEntityName, tableExists
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
def insertData2Db(engine: Session, tableName: str, columns: dict):
|
||||||
|
"""Inserts data into a database table.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
engine (): The database engine to use.
|
||||||
|
tableName (str): The name of the table to insert the data into.
|
||||||
|
columns (dict): A list of dictionaries containing the data to insert.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not tableExists(engine, tableName):
|
||||||
|
# If the table does not exist, print an error message and return.
|
||||||
|
print(f'Table {tableName} does not exist.')
|
||||||
|
return
|
||||||
|
|
||||||
|
# Create a dataframe from the columns.
|
||||||
|
df = pd.DataFrame([columns]) # The dataframe to insert.
|
||||||
|
|
||||||
|
# Insert the dataframe into the database.
|
||||||
|
df.to_sql(tableName, engine, if_exists='append', index=False)
|
||||||
|
|
||||||
|
|
||||||
|
class Importer:
|
||||||
|
def __init__(self, engine: Session, metadata: Session, docsDir: str):
|
||||||
|
self.engine = engine
|
||||||
|
self.metadata = metadata
|
||||||
|
self.docsDir = docsDir
|
||||||
|
|
||||||
|
def importNode(self, node: ET.Element, parentUuid: str = None, parentKey: str = None):
|
||||||
|
"""Imports a node from an XML file into the database.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
node (ET.Element): The node to import.
|
||||||
|
parentUuid (str, optional): The UUID of the parent node. Defaults to None.
|
||||||
|
parentKey (str, optional): The key of the parent node. Defaults to None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict[Dict]: The data from the node.
|
||||||
|
"""
|
||||||
|
|
||||||
|
data: dict[dict] = {'f__uuid': parentUuid} if parentUuid else {} # The data table to import
|
||||||
|
|
||||||
|
# Iterate through the children of the node.
|
||||||
|
for child in node:
|
||||||
|
# Iterate through the children of the node.
|
||||||
|
|
||||||
|
classKey: str = f"{child.get('key')}_{cleanEntityName(child.get('lbl'))}" # The key for the class
|
||||||
|
className: str = f"c__{classKey}" # The name of the class
|
||||||
|
fieldKey: str = f"{classKey}" # The key for the field
|
||||||
|
fieldName: str = f"f__{fieldKey}" # The name of the field
|
||||||
|
entityUuid: str = str(uuid.uuid4()) # The UUID for the entity
|
||||||
|
|
||||||
|
childData: dict[str, str] = {
|
||||||
|
"f__uuid": entityUuid,
|
||||||
|
} # The data table (with uuid) for the child node
|
||||||
|
|
||||||
|
if 'txt' in child.attrib:
|
||||||
|
# If the child node has a text attribute, it is an entity.
|
||||||
|
childData.update({fieldName: child.get('txt')})
|
||||||
|
|
||||||
|
if len(child) > 0:
|
||||||
|
# If the child node has children, import the data of the child node and its children.
|
||||||
|
|
||||||
|
# Recursively import the data of the child node.
|
||||||
|
childData.update(self.importNode(child))
|
||||||
|
|
||||||
|
# Insert the data of the child node into the database.
|
||||||
|
insertData2Db(self.engine, className, childData)
|
||||||
|
|
||||||
|
# Insert the relationship data into the database.
|
||||||
|
self.insertRelData(parentUuid, parentKey, entityUuid, classKey)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# If the child node has no children, import the data of the child node.
|
||||||
|
|
||||||
|
key: str = f"f__{child.get('key')}_{cleanEntityName(child.get('lbl'))}" # The key for the row
|
||||||
|
|
||||||
|
if child.text is not None:
|
||||||
|
row: dict = {key: child.text.replace('###{new_line}### ', '\n')} # The row to insert
|
||||||
|
|
||||||
|
data.update(row)
|
||||||
|
return data
|
||||||
|
|
||||||
|
def processXmlFile(self, filePath: str, fileName: str):
|
||||||
|
"""Processes an XML file and imports the data into the database.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filePath (str): The path to the XML file.
|
||||||
|
fileName (str): The name of the XML file.
|
||||||
|
"""
|
||||||
|
|
||||||
|
tree = ET.parse(filePath) # The XML tree.
|
||||||
|
root = tree.getroot() # The root of the XML tree.
|
||||||
|
|
||||||
|
for block in root.iter('block'):
|
||||||
|
# Iterate through the blocks in the XML file and import the data of each block.
|
||||||
|
if 'txt' in block.attrib:
|
||||||
|
# If the block has a 'txt' attribute, import the data of the block.
|
||||||
|
classKey: str = f"{block.get('txt')}" # The key for the class
|
||||||
|
blockUuid: str = str(uuid.uuid4()) # The UUID for the block
|
||||||
|
data: dict[dict] = self.importNode(block, blockUuid, classKey) # The data to import.
|
||||||
|
tableName: str = f"c__{cleanEntityName(block.get('txt'))}" # The name of the table to import the data into.
|
||||||
|
try:
|
||||||
|
insertData2Db(self.engine, tableName, data)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An error occurred while inserting data into {tableName}: {e}")
|
||||||
|
|
||||||
|
def importData(self):
|
||||||
|
"""Imports all XML files in a directory into the database.
|
||||||
|
Walks through the directory and processes each XML file.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Get the total number of XML files
|
||||||
|
totalFiles = sum([len([f for f in files if f.endswith('.xml')]) for r, d, files in os.walk(self.docsDir)]) # Create a progress bar
|
||||||
|
with tqdm(total=totalFiles, desc="Processing XML files", ncols=75) as pbar:
|
||||||
|
for dirpath, dirnames, filenames in os.walk(self.docsDir):
|
||||||
|
# Walk through the directory and process each XML file
|
||||||
|
for fileName in filenames:
|
||||||
|
if fileName.endswith('.xml'):
|
||||||
|
self.processXmlFile(os.path.join(dirpath, fileName), fileName)
|
||||||
|
# Update the progress bar
|
||||||
|
pbar.update(1)
|
||||||
|
print('Data imported.')
|
||||||
|
|
||||||
|
def insertRelData(self, parentUuid: str, parentKey: str, entityUuid: str, classKey: str):
|
||||||
|
"""Imports the relationship data into the database.
|
||||||
|
Args:
|
||||||
|
parentUuid (str): The UUID of the parent entity.
|
||||||
|
parentKey (str): The key of the parent entity.
|
||||||
|
entityUuid (str): The UUID of the entity.
|
||||||
|
classKey (str): The key of the entity.
|
||||||
|
"""
|
||||||
|
|
||||||
|
relationTableName: str = f"r__{parentKey}__{classKey}" # The name of the relation table
|
||||||
|
relRow = {f"f__{parentKey}__uuid": parentUuid,
|
||||||
|
f"f__{classKey}__uuid": entityUuid} # The row to insert into the relation table
|
||||||
|
relDf = pd.DataFrame([relRow]) # The dataframe to insert into the relation table
|
||||||
|
relDf.to_sql(relationTableName, self.engine, if_exists='append',
|
||||||
|
index=False) # Insert the dataframe into the relation table
|
||||||
36
initDb.py
Normal file
36
initDb.py
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
import os
|
||||||
|
from sqlalchemy import create_engine, MetaData
|
||||||
|
from initSchemas import initClassesFromSchemas, Base
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Database Initialization
|
||||||
|
def initDb(_production, schemaDir):
|
||||||
|
"""Initialize the database.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Initialize the classes from the schemas
|
||||||
|
print('Initializing the classes from the schemas...')
|
||||||
|
if not initClassesFromSchemas(schemaDir):
|
||||||
|
print('Cannot initialize database. No schemas found.')
|
||||||
|
return (False, False)
|
||||||
|
|
||||||
|
if _production:
|
||||||
|
dbName = 'database.db'
|
||||||
|
else:
|
||||||
|
dbName = 'test.db'
|
||||||
|
|
||||||
|
# Get the directory of the script
|
||||||
|
dirPath = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
# Create the path of the database file
|
||||||
|
dbPath = os.path.join(dirPath, dbName)
|
||||||
|
|
||||||
|
engine = create_engine(f'sqlite:///{dbPath}')
|
||||||
|
metadata = MetaData()
|
||||||
|
|
||||||
|
# Create all tables in the engine
|
||||||
|
Base.metadata.create_all(engine)
|
||||||
|
|
||||||
|
print('Database initialized.')
|
||||||
|
return engine, metadata
|
||||||
59
initSchemas.py
Normal file
59
initSchemas.py
Normal file
|
|
@ -0,0 +1,59 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from sqlalchemy import Column, Integer, String, Table
|
||||||
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
|
||||||
|
Base = declarative_base()
|
||||||
|
|
||||||
|
def createClass(name, columns):
|
||||||
|
"""Create a SQLAlchemy class from a JSON schema.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name (str): The name of the class.
|
||||||
|
columns (list): The columns of the class.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
SQLAlchemy.Class: The SQLAlchemy class.
|
||||||
|
"""
|
||||||
|
# Transform name and add prefix
|
||||||
|
className = name.lower().replace('-', '_').replace('.', '_').replace(' ', '_').replace('(', '_').replace(')', '_').replace('ä', 'ae').replace('ö', 'oe').replace('ü', 'ue').replace('ß', 'ss').replace('?', '_')
|
||||||
|
tableName = name.lower().replace('-', '_').replace('.', '_').replace(' ', '_').replace('(', '_').replace(')', '_').replace('ä', 'ae').replace('ö', 'oe').replace('ü', 'ue').replace('ß', 'ss').replace('?', '_')
|
||||||
|
|
||||||
|
# Transform columns and add prefix
|
||||||
|
attrs = {'__tablename__': tableName}
|
||||||
|
attrs.update({prop.lower().replace('-', '_').replace('.', '_').replace(' ', '_').replace('(', '_').replace(')','_').replace('ä', 'ae').replace('ö', 'oe').replace('ü', 'ue').replace('ß', 'ss').replace('?', '_'): (Column(String, primary_key=True) if prop.lower() == 'uuid' else Column(String)) for prop in columns})
|
||||||
|
|
||||||
|
# If 'uuid' is not in columns, add 'id' as primary key
|
||||||
|
if 'uuid' not in [prop.lower() for prop in columns]:
|
||||||
|
attrs['id'] = Column(Integer, primary_key=True)
|
||||||
|
|
||||||
|
# Create SQLAlchemy class
|
||||||
|
cls = type(className, (Base,), attrs)
|
||||||
|
|
||||||
|
# Define the table with extend_existing=True
|
||||||
|
Table(tableName, Base.metadata, extend_existing=True)
|
||||||
|
|
||||||
|
return cls
|
||||||
|
|
||||||
|
def initClassesFromSchemas(schemaDir):
|
||||||
|
"""Initialize the classes from the schemas.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not os.path.exists(schemaDir):
|
||||||
|
print('Schema directory does not exist.')
|
||||||
|
return False
|
||||||
|
|
||||||
|
schemaList = os.listdir(schemaDir)
|
||||||
|
|
||||||
|
if not schemaList:
|
||||||
|
print('No schemas JSON\'s found.')
|
||||||
|
return False
|
||||||
|
|
||||||
|
for fileName in schemaList:
|
||||||
|
if fileName.endswith('.json'):
|
||||||
|
with open(os.path.join(schemaDir, fileName), 'r') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
cls = createClass(data['name'], data['columns'])
|
||||||
|
globals()[cls.__name__] = cls # Add the class to the global namespace
|
||||||
|
print('Classes initialized from schemas.')
|
||||||
|
return True
|
||||||
3
requirements.txt
Normal file
3
requirements.txt
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
pandas
|
||||||
|
sqlalchemy
|
||||||
|
tqdm
|
||||||
9
utils.py
Normal file
9
utils.py
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
from sqlalchemy import MetaData, Table
|
||||||
|
|
||||||
|
def cleanEntityName(entityName):
|
||||||
|
return entityName.lower().replace('-', '_').replace('.', '_').replace(' ', '_').replace('(', '_').replace(')', '_').replace('ä', 'ae').replace('ö', 'oe').replace('ü', 'ue').replace('ß', 'ss').replace('?', '_')
|
||||||
|
|
||||||
|
def tableExists(engine, table_name):
|
||||||
|
metadata = MetaData()
|
||||||
|
metadata.reflect(bind=engine)
|
||||||
|
return table_name in metadata.tables
|
||||||
Loading…
Add table
Add a link
Reference in a new issue