fixed reporduction and death

This commit is contained in:
rnsrk 2025-04-03 09:47:26 +02:00
parent 34c9b1c0a9
commit d3fe4b50e4
9 changed files with 2831 additions and 6102 deletions

1
.gitignore vendored
View file

@ -9,3 +9,4 @@ venv
wisski_py wisski_py
.idea .idea
__pycache__ __pycache__
logs/*

View file

@ -23,6 +23,8 @@ headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers) api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default') api.pathbuilder = api.get_pathbuilder('default')
test = True
try: try:
processedRows = pd.read_csv(f'./logs/processedArtists.csv') processedRows = pd.read_csv(f'./logs/processedArtists.csv')
except FileNotFoundError: except FileNotFoundError:
@ -34,7 +36,7 @@ artistsTable = pd.read_sql_table('c__kue', con=engine)
artistValues = {} artistValues = {}
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]} digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
imageValues = {} imageValues = {}
imageAssignmentValues = {'f067784f5b1ff850672124a2b05360de': [str(uuid.uuid4())]} reproNumberAssignmentValues = {'fac4426c096e7f8f44bb0e11b8394952': [str(uuid.uuid4())]}
# Create artists # Create artists
for index, row in artistsTable.iterrows(): for index, row in artistsTable.iterrows():
@ -108,7 +110,7 @@ for index, row in artistsTable.iterrows():
item = item.replace('Epitaphien/', 'epitaphies/') item = item.replace('Epitaphien/', 'epitaphies/')
item = item.replace('Epitaphien\\', 'epitaphies/') item = item.replace('Epitaphien\\', 'epitaphies/')
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image) imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artifact_images/' + item + '.jpg'] # File imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artist_images/' + item + '.jpg'] # File
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
case 'f__6770_rosenb_nr_': case 'f__6770_rosenb_nr_':
artistValues['f82ed1dc96df9230e28e04fef0ff2305'] = value # Rosenberg number artistValues['f82ed1dc96df9230e28e04fef0ff2305'] = value # Rosenberg number
@ -138,12 +140,13 @@ for index, row in artistsTable.iterrows():
# Create Image Assignment entities and add their UUIDs to a list # Create Image Assignment entities and add their UUIDs to a list
# because we link Artist and Image Assignment over the UUID # because we link Artist and Image Assignment over the UUID
if imageList: if imageList:
imageAssignmentValues['f70afb79b45472fee3d02f011caa4b36'] = imageList # List of Image UUIDs reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44'] = imageList # List of Image UUIDs
imageAssignment = Entity(api=api, fields=imageAssignmentValues, bundle_id='b88e5d94fb2a83d62df99cf64d6c010c') reproNumberAssignment = Entity(api=api, fields=reproNumberAssignmentValues, bundle_id='bdc233b242374a41b5e6923eee937fe9')
api.save(imageAssignment) api.save(reproNumberAssignment)
if imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]:
artistValues['fbcc1a8aa38d416e580e0d1c9ff11e58'] = [imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]] # Image Assignment if reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44'][0]:
artistValues['f42deb039d8d4f47877892af005a1ef9'] = [reproNumberAssignmentValues['fac4426c096e7f8f44bb0e11b8394952'][0]] # Image Assignment
if digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]: if digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]:
artistValues['f6c2b79f1ba142bb62f83b2c4d805e49'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process artistValues['f6c2b79f1ba142bb62f83b2c4d805e49'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
@ -158,4 +161,9 @@ for index, row in artistsTable.iterrows():
processedRows = processedRows._append({'artistId': artistValues['f61deac361ac5e0731edbf214761d15c'][0], 'uuid': artistValues['fff2eb2283e4cd8df3783602a1bc96ab'][0], 'uri': artist.uri}, ignore_index=True) processedRows = processedRows._append({'artistId': artistValues['f61deac361ac5e0731edbf214761d15c'][0], 'uuid': artistValues['fff2eb2283e4cd8df3783602a1bc96ab'][0], 'uri': artist.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processedArtists.csv', index=False) processedRows.to_csv(f'./logs/processedArtists.csv', index=False)
if test:
print('Testing mode activated. Exiting.')
exit()
print('finish') print('finish')

View file

@ -23,6 +23,7 @@ headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers) api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('default') api.pathbuilder = api.get_pathbuilder('default')
test = True
tableName = "c__3330_todes_dat_" tableName = "c__3330_todes_dat_"
bundleId = 'b487c08016f572b9ecf3f9173339fec3' bundleId = 'b487c08016f572b9ecf3f9173339fec3'
@ -40,9 +41,9 @@ entityValues = {}
# Create entities # Create entities
for index, row in sqlTable.iterrows(): for index, row in sqlTable.iterrows():
# For every row in table... # For every row in table...
if index < len(processedRows) and sqlTable.iloc[index, 'docId'] == processedRows.iloc[index, 'docId']: if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
# skip if already processed # skip if already processed
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}') print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
continue continue
# Create Entity property dicts # Create Entity property dicts
entityValues = {} entityValues = {}
@ -65,7 +66,7 @@ for index, row in sqlTable.iterrows():
docId = value[0] docId = value[0]
case 'f__uuid': case 'f__uuid':
entityValues['f8beb0d372a5cf6f1668c47acf7e53cd'] = value # UUID entityValues['f8beb0d372a5cf6f1668c47acf7e53cd'] = value # UUID
uuid = value[0] fUuid = value[0]
case 'f__3330_todes_dat_': case 'f__3330_todes_dat_':
entityValues['f385a8c323f0a2f49d8eb175e1535b1b'] = value # Death date entityValues['f385a8c323f0a2f49d8eb175e1535b1b'] = value # Death date
case 'f__33ls_lit__stelle': case 'f__33ls_lit__stelle':
@ -95,4 +96,7 @@ for index, row in sqlTable.iterrows():
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True) processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False) processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
if test:
break
print('finish') print('finish')

View file

@ -135,6 +135,7 @@ for index, row in artifactsTable.iterrows():
for item in value: for item in value:
if item is not None: if item is not None:
# Replace dir paths in name # Replace dir paths in name
item = item.replace('Objekte/', 'objects/')
item = item.replace('Objekte\\', 'objects/') item = item.replace('Objekte\\', 'objects/')
item = item.replace('Objekte3\\', 'objects/') item = item.replace('Objekte3\\', 'objects/')
item = item.replace('Objekte4\\', 'objects/') item = item.replace('Objekte4\\', 'objects/')

View file

@ -23,7 +23,7 @@ headers = {"Cache-Control": "no-cache"}
api = Api(api_url, auth, headers) api = Api(api_url, auth, headers)
api.pathbuilder = api.get_pathbuilder('relations') api.pathbuilder = api.get_pathbuilder('relations')
test = True test = False
tableName = "r__kue__3007_bezieh__zu_gs" tableName = "r__kue__3007_bezieh__zu_gs"
bundleId = 'b464b2b43aaa27aaba71e337c9af649c' # Artist to goldsmith relation bundleId = 'b464b2b43aaa27aaba71e337c9af649c' # Artist to goldsmith relation

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff