fixed reporduction and death
This commit is contained in:
parent
34c9b1c0a9
commit
d3fe4b50e4
9 changed files with 2831 additions and 6102 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -9,3 +9,4 @@ venv
|
||||||
wisski_py
|
wisski_py
|
||||||
.idea
|
.idea
|
||||||
__pycache__
|
__pycache__
|
||||||
|
logs/*
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,8 @@ headers = {"Cache-Control": "no-cache"}
|
||||||
api = Api(api_url, auth, headers)
|
api = Api(api_url, auth, headers)
|
||||||
api.pathbuilder = api.get_pathbuilder('default')
|
api.pathbuilder = api.get_pathbuilder('default')
|
||||||
|
|
||||||
|
test = True
|
||||||
|
|
||||||
try:
|
try:
|
||||||
processedRows = pd.read_csv(f'./logs/processedArtists.csv')
|
processedRows = pd.read_csv(f'./logs/processedArtists.csv')
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
|
|
@ -34,7 +36,7 @@ artistsTable = pd.read_sql_table('c__kue', con=engine)
|
||||||
artistValues = {}
|
artistValues = {}
|
||||||
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
digitisationProcessValues = {'f32274ec0032b8778ba69d20108590cc': [str(uuid.uuid4())]}
|
||||||
imageValues = {}
|
imageValues = {}
|
||||||
imageAssignmentValues = {'f067784f5b1ff850672124a2b05360de': [str(uuid.uuid4())]}
|
reproNumberAssignmentValues = {'fac4426c096e7f8f44bb0e11b8394952': [str(uuid.uuid4())]}
|
||||||
|
|
||||||
# Create artists
|
# Create artists
|
||||||
for index, row in artistsTable.iterrows():
|
for index, row in artistsTable.iterrows():
|
||||||
|
|
@ -108,7 +110,7 @@ for index, row in artistsTable.iterrows():
|
||||||
item = item.replace('Epitaphien/', 'epitaphies/')
|
item = item.replace('Epitaphien/', 'epitaphies/')
|
||||||
item = item.replace('Epitaphien\\', 'epitaphies/')
|
item = item.replace('Epitaphien\\', 'epitaphies/')
|
||||||
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
|
imageValues.setdefault(item, {})['feb10344eaa7a5f414d1e8392853eba9'] = [item] # Reproduction Number (Image)
|
||||||
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artifact_images/' + item + '.jpg'] # File
|
imageValues[item]['fc8d57e55f203c75c2f8a1ae79378ac7'] = ['public://artist_images/' + item + '.jpg'] # File
|
||||||
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
|
imageValues[item]['f11beac4b638016479e6f3fbc7e55d1a'] = [str(uuid.uuid4())] # UUID
|
||||||
case 'f__6770_rosenb_nr_':
|
case 'f__6770_rosenb_nr_':
|
||||||
artistValues['f82ed1dc96df9230e28e04fef0ff2305'] = value # Rosenberg number
|
artistValues['f82ed1dc96df9230e28e04fef0ff2305'] = value # Rosenberg number
|
||||||
|
|
@ -138,12 +140,13 @@ for index, row in artistsTable.iterrows():
|
||||||
# Create Image Assignment entities and add their UUIDs to a list
|
# Create Image Assignment entities and add their UUIDs to a list
|
||||||
# because we link Artist and Image Assignment over the UUID
|
# because we link Artist and Image Assignment over the UUID
|
||||||
if imageList:
|
if imageList:
|
||||||
imageAssignmentValues['f70afb79b45472fee3d02f011caa4b36'] = imageList # List of Image UUIDs
|
reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44'] = imageList # List of Image UUIDs
|
||||||
imageAssignment = Entity(api=api, fields=imageAssignmentValues, bundle_id='b88e5d94fb2a83d62df99cf64d6c010c')
|
reproNumberAssignment = Entity(api=api, fields=reproNumberAssignmentValues, bundle_id='bdc233b242374a41b5e6923eee937fe9')
|
||||||
api.save(imageAssignment)
|
api.save(reproNumberAssignment)
|
||||||
|
|
||||||
if imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]:
|
|
||||||
artistValues['fbcc1a8aa38d416e580e0d1c9ff11e58'] = [imageAssignmentValues['f067784f5b1ff850672124a2b05360de'][0]] # Image Assignment
|
if reproNumberAssignmentValues['f2cd4ece6e60bf288b9ae769af08bc44'][0]:
|
||||||
|
artistValues['f42deb039d8d4f47877892af005a1ef9'] = [reproNumberAssignmentValues['fac4426c096e7f8f44bb0e11b8394952'][0]] # Image Assignment
|
||||||
if digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]:
|
if digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]:
|
||||||
artistValues['f6c2b79f1ba142bb62f83b2c4d805e49'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
artistValues['f6c2b79f1ba142bb62f83b2c4d805e49'] = [digitisationProcessValues['f32274ec0032b8778ba69d20108590cc'][0]] # Digitisation Process
|
||||||
|
|
||||||
|
|
@ -158,4 +161,9 @@ for index, row in artistsTable.iterrows():
|
||||||
processedRows = processedRows._append({'artistId': artistValues['f61deac361ac5e0731edbf214761d15c'][0], 'uuid': artistValues['fff2eb2283e4cd8df3783602a1bc96ab'][0], 'uri': artist.uri}, ignore_index=True)
|
processedRows = processedRows._append({'artistId': artistValues['f61deac361ac5e0731edbf214761d15c'][0], 'uuid': artistValues['fff2eb2283e4cd8df3783602a1bc96ab'][0], 'uri': artist.uri}, ignore_index=True)
|
||||||
processedRows.to_csv(f'./logs/processedArtists.csv', index=False)
|
processedRows.to_csv(f'./logs/processedArtists.csv', index=False)
|
||||||
|
|
||||||
|
|
||||||
|
if test:
|
||||||
|
print('Testing mode activated. Exiting.')
|
||||||
|
exit()
|
||||||
|
|
||||||
print('finish')
|
print('finish')
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,7 @@ headers = {"Cache-Control": "no-cache"}
|
||||||
api = Api(api_url, auth, headers)
|
api = Api(api_url, auth, headers)
|
||||||
api.pathbuilder = api.get_pathbuilder('default')
|
api.pathbuilder = api.get_pathbuilder('default')
|
||||||
|
|
||||||
|
test = True
|
||||||
|
|
||||||
tableName = "c__3330_todes_dat_"
|
tableName = "c__3330_todes_dat_"
|
||||||
bundleId = 'b487c08016f572b9ecf3f9173339fec3'
|
bundleId = 'b487c08016f572b9ecf3f9173339fec3'
|
||||||
|
|
@ -40,9 +41,9 @@ entityValues = {}
|
||||||
# Create entities
|
# Create entities
|
||||||
for index, row in sqlTable.iterrows():
|
for index, row in sqlTable.iterrows():
|
||||||
# For every row in table...
|
# For every row in table...
|
||||||
if index < len(processedRows) and sqlTable.iloc[index, 'docId'] == processedRows.iloc[index, 'docId']:
|
if index < len(processedRows) and sqlTable.loc[index, 'id'] == processedRows.loc[index, 'docId']:
|
||||||
# skip if already processed
|
# skip if already processed
|
||||||
print(f'Skipping already processed entity {sqlTable.iloc[index, 0]}')
|
print(f'Skipping already processed entity {sqlTable.loc[index, 'id']}')
|
||||||
continue
|
continue
|
||||||
# Create Entity property dicts
|
# Create Entity property dicts
|
||||||
entityValues = {}
|
entityValues = {}
|
||||||
|
|
@ -65,7 +66,7 @@ for index, row in sqlTable.iterrows():
|
||||||
docId = value[0]
|
docId = value[0]
|
||||||
case 'f__uuid':
|
case 'f__uuid':
|
||||||
entityValues['f8beb0d372a5cf6f1668c47acf7e53cd'] = value # UUID
|
entityValues['f8beb0d372a5cf6f1668c47acf7e53cd'] = value # UUID
|
||||||
uuid = value[0]
|
fUuid = value[0]
|
||||||
case 'f__3330_todes_dat_':
|
case 'f__3330_todes_dat_':
|
||||||
entityValues['f385a8c323f0a2f49d8eb175e1535b1b'] = value # Death date
|
entityValues['f385a8c323f0a2f49d8eb175e1535b1b'] = value # Death date
|
||||||
case 'f__33ls_lit__stelle':
|
case 'f__33ls_lit__stelle':
|
||||||
|
|
@ -95,4 +96,7 @@ for index, row in sqlTable.iterrows():
|
||||||
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
processedRows = processedRows._append({'docId': docId, 'uuid': fUuid, 'uri': entity.uri}, ignore_index=True)
|
||||||
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
processedRows.to_csv(f'./logs/processed-{tableName}.csv', index=False)
|
||||||
|
|
||||||
|
if test:
|
||||||
|
break
|
||||||
|
|
||||||
print('finish')
|
print('finish')
|
||||||
|
|
|
||||||
|
|
@ -135,6 +135,7 @@ for index, row in artifactsTable.iterrows():
|
||||||
for item in value:
|
for item in value:
|
||||||
if item is not None:
|
if item is not None:
|
||||||
# Replace dir paths in name
|
# Replace dir paths in name
|
||||||
|
item = item.replace('Objekte/', 'objects/')
|
||||||
item = item.replace('Objekte\\', 'objects/')
|
item = item.replace('Objekte\\', 'objects/')
|
||||||
item = item.replace('Objekte3\\', 'objects/')
|
item = item.replace('Objekte3\\', 'objects/')
|
||||||
item = item.replace('Objekte4\\', 'objects/')
|
item = item.replace('Objekte4\\', 'objects/')
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ headers = {"Cache-Control": "no-cache"}
|
||||||
api = Api(api_url, auth, headers)
|
api = Api(api_url, auth, headers)
|
||||||
api.pathbuilder = api.get_pathbuilder('relations')
|
api.pathbuilder = api.get_pathbuilder('relations')
|
||||||
|
|
||||||
test = True
|
test = False
|
||||||
|
|
||||||
tableName = "r__kue__3007_bezieh__zu_gs"
|
tableName = "r__kue__3007_bezieh__zu_gs"
|
||||||
bundleId = 'b464b2b43aaa27aaba71e337c9af649c' # Artist to goldsmith relation
|
bundleId = 'b464b2b43aaa27aaba71e337c9af649c' # Artist to goldsmith relation
|
||||||
|
|
|
||||||
3066
logs/delete.csv
3066
logs/delete.csv
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue