asigalov61 commited on
Commit
a337155
·
verified ·
1 Parent(s): 4478b59

Upload TMIDIX.py

Browse files
Files changed (1) hide show
  1. TMIDIX.py +1206 -30
TMIDIX.py CHANGED
@@ -1,6 +1,5 @@
1
  #! /usr/bin/python3
2
 
3
-
4
  r'''###############################################################################
5
  ###################################################################################
6
  #
@@ -8,7 +7,7 @@ r'''############################################################################
8
  # Tegridy MIDI X Module (TMIDI X / tee-midi eks)
9
  # Version 1.0
10
  #
11
- # NOTE: TMIDI X Module starts after the partial MIDI.py module @ line 1342
12
  #
13
  # Based upon MIDI.py module v.6.7. by Peter Billam / pjb.com.au
14
  #
@@ -21,19 +20,19 @@ r'''############################################################################
21
  #
22
  ###################################################################################
23
  ###################################################################################
24
- # Copyright 2025 Project Los Angeles / Tegridy Code
25
  #
26
- # Licensed under the Apache License, Version 2.0 (the "License");
27
- # you may not use this file except in compliance with the License.
28
- # You may obtain a copy of the License at
29
  #
30
- # http://www.apache.org/licenses/LICENSE-2.0
31
  #
32
- # Unless required by applicable law or agreed to in writing, software
33
- # distributed under the License is distributed on an "AS IS" BASIS,
34
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
35
- # See the License for the specific language governing permissions and
36
- # limitations under the License.
37
  ###################################################################################
38
  ###################################################################################
39
  #
@@ -51,6 +50,7 @@ r'''############################################################################
51
  ###################################################################################'''
52
 
53
  import sys, struct, copy
 
54
  Version = '6.7'
55
  VersionDate = '20201120'
56
 
@@ -1446,8 +1446,9 @@ def _encode(events_lol, unknown_callback=None, never_add_eot=False,
1446
  # pjb.com.au
1447
  #
1448
  # Project Los Angeles
1449
- # Tegridy Code 2021
1450
- # https://github.com/Tegridy-Code/Project-Los-Angeles
 
1451
  #
1452
  ###################################################################################
1453
  ###################################################################################
@@ -1457,8 +1458,6 @@ import os
1457
 
1458
  import datetime
1459
 
1460
- import copy
1461
-
1462
  from datetime import datetime
1463
 
1464
  import secrets
@@ -1475,12 +1474,12 @@ import multiprocessing
1475
 
1476
  from itertools import zip_longest
1477
  from itertools import groupby
 
1478
  from collections import Counter
 
1479
 
1480
  from operator import itemgetter
1481
 
1482
- import sys
1483
-
1484
  from abc import ABC, abstractmethod
1485
 
1486
  from difflib import SequenceMatcher as SM
@@ -1490,6 +1489,14 @@ import math
1490
 
1491
  import matplotlib.pyplot as plt
1492
 
 
 
 
 
 
 
 
 
1493
  ###################################################################################
1494
  #
1495
  # Original TMIDI Tegridy helper functions
@@ -4142,15 +4149,16 @@ def tones_chord_to_pitches(tones_chord, base_pitch=60):
4142
  ###################################################################################
4143
 
4144
  def advanced_score_processor(raw_score,
4145
- patches_to_analyze=list(range(129)),
4146
- return_score_analysis=False,
4147
- return_enhanced_score=False,
4148
- return_enhanced_score_notes=False,
4149
- return_enhanced_monophonic_melody=False,
4150
- return_chordified_enhanced_score=False,
4151
- return_chordified_enhanced_score_with_lyrics=False,
4152
- return_score_tones_chords=False,
4153
- return_text_and_lyric_events=False
 
4154
  ):
4155
 
4156
  '''TMIDIX Advanced Score Processor'''
@@ -4179,6 +4187,20 @@ def advanced_score_processor(raw_score,
4179
  basic_single_track_score.append(ev)
4180
  num_tracks += 1
4181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4182
  basic_single_track_score.sort(key=lambda x: x[4] if x[0] == 'note' else 128, reverse=True)
4183
  basic_single_track_score.sort(key=lambda x: x[1])
4184
 
@@ -4193,7 +4215,7 @@ def advanced_score_processor(raw_score,
4193
  enhanced_single_track_score.append(event)
4194
  num_patch_changes += 1
4195
 
4196
- if event[0] == 'note':
4197
  if event[3] != 9:
4198
  event.extend([patches[event[3]]])
4199
  all_score_patches.extend([patches[event[3]]])
@@ -11182,7 +11204,1161 @@ def rle_decode_ones(encoding, size=(128, 128)):
11182
  return matrix
11183
 
11184
  ###################################################################################
11185
- #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11186
  # This is the end of the TMIDI X Python module
11187
- #
11188
  ###################################################################################
 
1
  #! /usr/bin/python3
2
 
 
3
  r'''###############################################################################
4
  ###################################################################################
5
  #
 
7
  # Tegridy MIDI X Module (TMIDI X / tee-midi eks)
8
  # Version 1.0
9
  #
10
+ # NOTE: TMIDI X Module starts after the partial MIDI.py module @ line 1438
11
  #
12
  # Based upon MIDI.py module v.6.7. by Peter Billam / pjb.com.au
13
  #
 
20
  #
21
  ###################################################################################
22
  ###################################################################################
23
+ # Copyright 2025 Project Los Angeles / Tegridy Code
24
  #
25
+ # Licensed under the Apache License, Version 2.0 (the "License");
26
+ # you may not use this file except in compliance with the License.
27
+ # You may obtain a copy of the License at
28
  #
29
+ # http://www.apache.org/licenses/LICENSE-2.0
30
  #
31
+ # Unless required by applicable law or agreed to in writing, software
32
+ # distributed under the License is distributed on an "AS IS" BASIS,
33
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
34
+ # See the License for the specific language governing permissions and
35
+ # limitations under the License.
36
  ###################################################################################
37
  ###################################################################################
38
  #
 
50
  ###################################################################################'''
51
 
52
  import sys, struct, copy
53
+
54
  Version = '6.7'
55
  VersionDate = '20201120'
56
 
 
1446
  # pjb.com.au
1447
  #
1448
  # Project Los Angeles
1449
+ # Tegridy Code 2025
1450
+ #
1451
+ # https://github.com/Tegridy-Code/Project-Los-Angeles
1452
  #
1453
  ###################################################################################
1454
  ###################################################################################
 
1458
 
1459
  import datetime
1460
 
 
 
1461
  from datetime import datetime
1462
 
1463
  import secrets
 
1474
 
1475
  from itertools import zip_longest
1476
  from itertools import groupby
1477
+
1478
  from collections import Counter
1479
+ from collections import defaultdict
1480
 
1481
  from operator import itemgetter
1482
 
 
 
1483
  from abc import ABC, abstractmethod
1484
 
1485
  from difflib import SequenceMatcher as SM
 
1489
 
1490
  import matplotlib.pyplot as plt
1491
 
1492
+ import psutil
1493
+
1494
+ import json
1495
+
1496
+ from pathlib import Path
1497
+
1498
+ import shutil
1499
+
1500
  ###################################################################################
1501
  #
1502
  # Original TMIDI Tegridy helper functions
 
4149
  ###################################################################################
4150
 
4151
  def advanced_score_processor(raw_score,
4152
+ patches_to_analyze=list(range(129)),
4153
+ return_score_analysis=False,
4154
+ return_enhanced_score=False,
4155
+ return_enhanced_score_notes=False,
4156
+ return_enhanced_monophonic_melody=False,
4157
+ return_chordified_enhanced_score=False,
4158
+ return_chordified_enhanced_score_with_lyrics=False,
4159
+ return_score_tones_chords=False,
4160
+ return_text_and_lyric_events=False,
4161
+ apply_sustain=False
4162
  ):
4163
 
4164
  '''TMIDIX Advanced Score Processor'''
 
4187
  basic_single_track_score.append(ev)
4188
  num_tracks += 1
4189
 
4190
+ for e in basic_single_track_score:
4191
+
4192
+ if e[0] == 'note':
4193
+ e[3] = e[3] % 16
4194
+ e[4] = e[4] % 128
4195
+ e[5] = e[5] % 128
4196
+
4197
+ if e[0] == 'patch_change':
4198
+ e[2] = e[2] % 16
4199
+ e[3] = e[3] % 128
4200
+
4201
+ if apply_sustain:
4202
+ apply_sustain_to_ms_score([1000, basic_single_track_score])
4203
+
4204
  basic_single_track_score.sort(key=lambda x: x[4] if x[0] == 'note' else 128, reverse=True)
4205
  basic_single_track_score.sort(key=lambda x: x[1])
4206
 
 
4215
  enhanced_single_track_score.append(event)
4216
  num_patch_changes += 1
4217
 
4218
+ if event[0] == 'note':
4219
  if event[3] != 9:
4220
  event.extend([patches[event[3]]])
4221
  all_score_patches.extend([patches[event[3]]])
 
11204
  return matrix
11205
 
11206
  ###################################################################################
11207
+
11208
+ def vertical_list_search(list_of_lists, trg_list):
11209
+
11210
+ src_list = list_of_lists
11211
+
11212
+ if not src_list or not trg_list:
11213
+ return []
11214
+
11215
+ num_rows = len(src_list)
11216
+ k = len(trg_list)
11217
+
11218
+ row_sets = [set(row) for row in src_list]
11219
+
11220
+ results = []
11221
+
11222
+ for start in range(num_rows - k + 1):
11223
+ valid = True
11224
+
11225
+ for offset, target in enumerate(trg_list):
11226
+
11227
+ if target not in row_sets[start + offset]:
11228
+ valid = False
11229
+ break
11230
+
11231
+ if valid:
11232
+ results.append(list(range(start, start + k)))
11233
+
11234
+ return results
11235
+
11236
+ ###################################################################################
11237
+
11238
+ def smooth_values(values, window_size=3):
11239
+
11240
+ smoothed = []
11241
+
11242
+ for i in range(len(values)):
11243
+
11244
+ start = max(0, i - window_size // 2)
11245
+ end = min(len(values), i + window_size // 2 + 1)
11246
+
11247
+ window = values[start:end]
11248
+
11249
+ smoothed.append(int(sum(window) / len(window)))
11250
+
11251
+ return smoothed
11252
+
11253
+ ###################################################################################
11254
+
11255
+ def is_mostly_wide_peaks_and_valleys(values,
11256
+ min_range=32,
11257
+ threshold=0.7,
11258
+ smoothing_window=5
11259
+ ):
11260
+
11261
+ if not values:
11262
+ return False
11263
+
11264
+ smoothed_values = smooth_values(values, smoothing_window)
11265
+
11266
+ value_range = max(smoothed_values) - min(smoothed_values)
11267
+
11268
+ if value_range < min_range:
11269
+ return False
11270
+
11271
+ if all(v == smoothed_values[0] for v in smoothed_values):
11272
+ return False
11273
+
11274
+ trend_types = []
11275
+
11276
+ for i in range(1, len(smoothed_values)):
11277
+ if smoothed_values[i] > smoothed_values[i - 1]:
11278
+ trend_types.append(1)
11279
+
11280
+ elif smoothed_values[i] < smoothed_values[i - 1]:
11281
+ trend_types.append(-1)
11282
+
11283
+ else:
11284
+ trend_types.append(0)
11285
+
11286
+ trend_count = trend_types.count(1) + trend_types.count(-1)
11287
+
11288
+ proportion = trend_count / len(trend_types)
11289
+
11290
+ return proportion >= threshold
11291
+
11292
+ ###################################################################################
11293
+
11294
+ def system_memory_utilization(return_dict=False):
11295
+
11296
+ if return_dict:
11297
+ return dict(psutil.virtual_memory()._asdict())
11298
+
11299
+ else:
11300
+ print('RAM memory % used:', psutil.virtual_memory()[2])
11301
+ print('RAM Used (GB):', psutil.virtual_memory()[3]/(1024**3))
11302
+
11303
+ ###################################################################################
11304
+
11305
+ def create_files_list(datasets_paths=['./'],
11306
+ files_exts=['.mid', '.midi', '.kar', '.MID', '.MIDI', '.KAR'],
11307
+ randomize_files_list=True,
11308
+ verbose=True
11309
+ ):
11310
+ if verbose:
11311
+ print('=' * 70)
11312
+ print('Searching for files...')
11313
+ print('This may take a while on a large dataset in particular...')
11314
+ print('=' * 70)
11315
+
11316
+ filez_set = defaultdict(None)
11317
+
11318
+ files_exts = tuple(files_exts)
11319
+
11320
+ for dataset_addr in tqdm.tqdm(datasets_paths, disable=not verbose):
11321
+ for dirpath, dirnames, filenames in os.walk(dataset_addr):
11322
+ for file in filenames:
11323
+ if file not in filez_set and file.endswith(files_exts):
11324
+ filez_set[os.path.join(dirpath, file)] = None
11325
+
11326
+ filez = list(filez_set.keys())
11327
+
11328
+ if verbose:
11329
+ print('Done!')
11330
+ print('=' * 70)
11331
+
11332
+ if filez:
11333
+ if randomize_files_list:
11334
+
11335
+ if verbose:
11336
+ print('Randomizing file list...')
11337
+
11338
+ random.shuffle(filez)
11339
+
11340
+ if verbose:
11341
+ print('Done!')
11342
+ print('=' * 70)
11343
+
11344
+ if verbose:
11345
+ print('Found', len(filez), 'files.')
11346
+ print('=' * 70)
11347
+
11348
+ else:
11349
+ if verbose:
11350
+ print('Could not find any files...')
11351
+ print('Please check dataset dirs and files extensions...')
11352
+ print('=' * 70)
11353
+
11354
+ return filez
11355
+
11356
+ ###################################################################################
11357
+
11358
+ def has_consecutive_trend(nums, count):
11359
+
11360
+ if len(nums) < count:
11361
+ return False
11362
+
11363
+ increasing_streak = 1
11364
+ decreasing_streak = 1
11365
+
11366
+ for i in range(1, len(nums)):
11367
+ if nums[i] > nums[i - 1]:
11368
+ increasing_streak += 1
11369
+ decreasing_streak = 1
11370
+
11371
+ elif nums[i] < nums[i - 1]:
11372
+ decreasing_streak += 1
11373
+ increasing_streak = 1
11374
+
11375
+ else:
11376
+ increasing_streak = decreasing_streak = 1
11377
+
11378
+ if increasing_streak == count or decreasing_streak == count:
11379
+ return True
11380
+
11381
+ return False
11382
+
11383
+ ###################################################################################
11384
+
11385
+ def escore_notes_primary_features(escore_notes):
11386
+
11387
+ #=================================================================
11388
+
11389
+ def mean(values):
11390
+ return sum(values) / len(values) if values else None
11391
+
11392
+ def std(values):
11393
+ if not values:
11394
+ return None
11395
+ m = mean(values)
11396
+ return math.sqrt(sum((x - m) ** 2 for x in values) / len(values)) if m is not None else None
11397
+
11398
+ def skew(values):
11399
+ if not values:
11400
+ return None
11401
+ m = mean(values)
11402
+ s = std(values)
11403
+ if s is None or s == 0:
11404
+ return None
11405
+ return sum(((x - m) / s) ** 3 for x in values) / len(values)
11406
+
11407
+ def kurtosis(values):
11408
+ if not values:
11409
+ return None
11410
+ m = mean(values)
11411
+ s = std(values)
11412
+ if s is None or s == 0:
11413
+ return None
11414
+ return sum(((x - m) / s) ** 4 for x in values) / len(values) - 3
11415
+
11416
+ def median(values):
11417
+ if not values:
11418
+ return None
11419
+ srt = sorted(values)
11420
+ n = len(srt)
11421
+ mid = n // 2
11422
+ if n % 2 == 0:
11423
+ return (srt[mid - 1] + srt[mid]) / 2.0
11424
+ return srt[mid]
11425
+
11426
+ def percentile(values, p):
11427
+ if not values:
11428
+ return None
11429
+ srt = sorted(values)
11430
+ n = len(srt)
11431
+ k = (n - 1) * p / 100.0
11432
+ f = int(k)
11433
+ c = k - f
11434
+ if f + 1 < n:
11435
+ return srt[f] * (1 - c) + srt[f + 1] * c
11436
+ return srt[f]
11437
+
11438
+ def diff(values):
11439
+ if not values or len(values) < 2:
11440
+ return []
11441
+ return [values[i + 1] - values[i] for i in range(len(values) - 1)]
11442
+
11443
+ def mad(values):
11444
+ if not values:
11445
+ return None
11446
+ m = median(values)
11447
+ return median([abs(x - m) for x in values])
11448
+
11449
+ def entropy(values):
11450
+ if not values:
11451
+ return None
11452
+ freq = {}
11453
+ for v in values:
11454
+ freq[v] = freq.get(v, 0) + 1
11455
+ total = len(values)
11456
+ ent = 0.0
11457
+ for count in freq.values():
11458
+ p_val = count / total
11459
+ ent -= p_val * math.log2(p_val)
11460
+ return ent
11461
+
11462
+ def mode(values):
11463
+ if not values:
11464
+ return None
11465
+ freq = {}
11466
+ for v in values:
11467
+ freq[v] = freq.get(v, 0) + 1
11468
+ max_count = max(freq.values())
11469
+ modes = [k for k, count in freq.items() if count == max_count]
11470
+ return min(modes)
11471
+
11472
+
11473
+ #=================================================================
11474
+
11475
+ sp_score = solo_piano_escore_notes(escore_notes)
11476
+
11477
+ dscore = delta_score_notes(sp_score)
11478
+
11479
+ seq = []
11480
+
11481
+ for d in dscore:
11482
+ seq.extend([d[1], d[2], d[4]])
11483
+
11484
+ #=================================================================
11485
+
11486
+ n = len(seq)
11487
+ if n % 3 != 0:
11488
+ seq = seq[: n - (n % 3)]
11489
+ arr = [seq[i:i + 3] for i in range(0, len(seq), 3)]
11490
+
11491
+ #=================================================================
11492
+
11493
+ features = {}
11494
+
11495
+ delta_times = [row[0] for row in arr]
11496
+ if delta_times:
11497
+ features['delta_times_mean'] = mean(delta_times)
11498
+ features['delta_times_std'] = std(delta_times)
11499
+ features['delta_times_min'] = min(delta_times)
11500
+ features['delta_times_max'] = max(delta_times)
11501
+ features['delta_times_skew'] = skew(delta_times)
11502
+ features['delta_times_kurtosis'] = kurtosis(delta_times)
11503
+ delta_zero_count = sum(1 for x in delta_times if x == 0)
11504
+ features['delta_times_zero_ratio'] = delta_zero_count / len(delta_times)
11505
+ nonzero_dt = [x for x in delta_times if x != 0]
11506
+ if nonzero_dt:
11507
+ features['delta_times_nonzero_mean'] = mean(nonzero_dt)
11508
+ features['delta_times_nonzero_std'] = std(nonzero_dt)
11509
+ else:
11510
+ features['delta_times_nonzero_mean'] = None
11511
+ features['delta_times_nonzero_std'] = None
11512
+ features['delta_times_mad'] = mad(delta_times)
11513
+ features['delta_times_cv'] = (features['delta_times_std'] / features['delta_times_mean']
11514
+ if features['delta_times_mean'] and features['delta_times_mean'] != 0 else None)
11515
+ features['delta_times_entropy'] = entropy(delta_times)
11516
+ features['delta_times_range'] = max(delta_times) - min(delta_times)
11517
+ features['delta_times_median'] = median(delta_times)
11518
+ features['delta_times_quantile_25'] = percentile(delta_times, 25)
11519
+ features['delta_times_quantile_75'] = percentile(delta_times, 75)
11520
+ if (features['delta_times_quantile_25'] is not None and features['delta_times_quantile_75'] is not None):
11521
+ features['delta_times_iqr'] = features['delta_times_quantile_75'] - features['delta_times_quantile_25']
11522
+ else:
11523
+ features['delta_times_iqr'] = None
11524
+ else:
11525
+ for key in ['delta_times_mean', 'delta_times_std', 'delta_times_min', 'delta_times_max',
11526
+ 'delta_times_skew', 'delta_times_kurtosis', 'delta_times_zero_ratio',
11527
+ 'delta_times_nonzero_mean', 'delta_times_nonzero_std', 'delta_times_mad',
11528
+ 'delta_times_cv', 'delta_times_entropy', 'delta_times_range', 'delta_times_median',
11529
+ 'delta_times_quantile_25', 'delta_times_quantile_75', 'delta_times_iqr']:
11530
+ features[key] = None
11531
+
11532
+ #=================================================================
11533
+
11534
+ durations = [row[1] for row in arr]
11535
+ if durations:
11536
+ features['durations_mean'] = mean(durations)
11537
+ features['durations_std'] = std(durations)
11538
+ features['durations_min'] = min(durations)
11539
+ features['durations_max'] = max(durations)
11540
+ features['durations_skew'] = skew(durations)
11541
+ features['durations_kurtosis'] = kurtosis(durations)
11542
+ features['durations_mad'] = mad(durations)
11543
+ features['durations_cv'] = (features['durations_std'] / features['durations_mean']
11544
+ if features['durations_mean'] and features['durations_mean'] != 0 else None)
11545
+ features['durations_entropy'] = entropy(durations)
11546
+ features['durations_range'] = max(durations) - min(durations)
11547
+ features['durations_median'] = median(durations)
11548
+ features['durations_quantile_25'] = percentile(durations, 25)
11549
+ features['durations_quantile_75'] = percentile(durations, 75)
11550
+ if features['durations_quantile_25'] is not None and features['durations_quantile_75'] is not None:
11551
+ features['durations_iqr'] = features['durations_quantile_75'] - features['durations_quantile_25']
11552
+ else:
11553
+ features['durations_iqr'] = None
11554
+ else:
11555
+ for key in ['durations_mean', 'durations_std', 'durations_min', 'durations_max',
11556
+ 'durations_skew', 'durations_kurtosis', 'durations_mad', 'durations_cv',
11557
+ 'durations_entropy', 'durations_range', 'durations_median', 'durations_quantile_25',
11558
+ 'durations_quantile_75', 'durations_iqr']:
11559
+ features[key] = None
11560
+
11561
+ #=================================================================
11562
+
11563
+ pitches = [row[2] for row in arr]
11564
+ if pitches:
11565
+ features['pitches_mean'] = mean(pitches)
11566
+ features['pitches_std'] = std(pitches)
11567
+ features['pitches_min'] = min(pitches)
11568
+ features['pitches_max'] = max(pitches)
11569
+ features['pitches_skew'] = skew(pitches)
11570
+ features['pitches_kurtosis'] = kurtosis(pitches)
11571
+ features['pitches_range'] = max(pitches) - min(pitches)
11572
+ features['pitches_median'] = median(pitches)
11573
+ features['pitches_quantile_25'] = percentile(pitches, 25)
11574
+ features['pitches_quantile_75'] = percentile(pitches, 75)
11575
+ if len(pitches) > 1:
11576
+ dps = diff(pitches)
11577
+ features['pitches_diff_mean'] = mean(dps)
11578
+ features['pitches_diff_std'] = std(dps)
11579
+ else:
11580
+ features['pitches_diff_mean'] = None
11581
+ features['pitches_diff_std'] = None
11582
+ features['pitches_mad'] = mad(pitches)
11583
+ if len(pitches) > 2:
11584
+ peaks = sum(1 for i in range(1, len(pitches)-1)
11585
+ if pitches[i] > pitches[i-1] and pitches[i] > pitches[i+1])
11586
+ valleys = sum(1 for i in range(1, len(pitches)-1)
11587
+ if pitches[i] < pitches[i-1] and pitches[i] < pitches[i+1])
11588
+ else:
11589
+ peaks, valleys = None, None
11590
+ features['pitches_peak_count'] = peaks
11591
+ features['pitches_valley_count'] = valleys
11592
+ if len(pitches) > 1:
11593
+ x = list(range(len(pitches)))
11594
+ denominator = (len(x) * sum(xi ** 2 for xi in x) - sum(x) ** 2)
11595
+ if denominator != 0:
11596
+ slope = (len(x) * sum(x[i] * pitches[i] for i in range(len(x))) -
11597
+ sum(x) * sum(pitches)) / denominator
11598
+ else:
11599
+ slope = None
11600
+ features['pitches_trend_slope'] = slope
11601
+ else:
11602
+ features['pitches_trend_slope'] = None
11603
+
11604
+ features['pitches_unique_count'] = len(set(pitches))
11605
+ pitch_class_hist = {i: 0 for i in range(12)}
11606
+ for p in pitches:
11607
+ pitch_class_hist[p % 12] += 1
11608
+ total_pitch = len(pitches)
11609
+ for i in range(12):
11610
+ features[f'pitches_pc_{i}'] = (pitch_class_hist[i] / total_pitch) if total_pitch > 0 else None
11611
+
11612
+ max_asc = 0
11613
+ cur_asc = 0
11614
+ max_desc = 0
11615
+ cur_desc = 0
11616
+ for i in range(1, len(pitches)):
11617
+ if pitches[i] > pitches[i-1]:
11618
+ cur_asc += 1
11619
+ max_asc = max(max_asc, cur_asc)
11620
+ cur_desc = 0
11621
+ elif pitches[i] < pitches[i-1]:
11622
+ cur_desc += 1
11623
+ max_desc = max(max_desc, cur_desc)
11624
+ cur_asc = 0
11625
+ else:
11626
+ cur_asc = 0
11627
+ cur_desc = 0
11628
+ features['pitches_max_consecutive_ascending'] = max_asc if pitches else None
11629
+ features['pitches_max_consecutive_descending'] = max_desc if pitches else None
11630
+ p_intervals = diff(pitches)
11631
+ features['pitches_median_diff'] = median(p_intervals) if p_intervals else None
11632
+ if p_intervals:
11633
+ dc = sum(1 for i in range(1, len(p_intervals))
11634
+ if (p_intervals[i] > 0 and p_intervals[i-1] < 0) or (p_intervals[i] < 0 and p_intervals[i-1] > 0))
11635
+ features['pitches_direction_changes'] = dc
11636
+ else:
11637
+ features['pitches_direction_changes'] = None
11638
+ else:
11639
+ for key in (['pitches_mean', 'pitches_std', 'pitches_min', 'pitches_max', 'pitches_skew',
11640
+ 'pitches_kurtosis', 'pitches_range', 'pitches_median', 'pitches_quantile_25',
11641
+ 'pitches_quantile_75', 'pitches_diff_mean', 'pitches_diff_std', 'pitches_mad',
11642
+ 'pitches_peak_count', 'pitches_valley_count', 'pitches_trend_slope',
11643
+ 'pitches_unique_count', 'pitches_max_consecutive_ascending', 'pitches_max_consecutive_descending',
11644
+ 'pitches_median_diff', 'pitches_direction_changes'] +
11645
+ [f'pitches_pc_{i}' for i in range(12)]):
11646
+ features[key] = None
11647
+
11648
+ #=================================================================
11649
+
11650
+ overall = [x for row in arr for x in row]
11651
+ if overall:
11652
+ features['overall_mean'] = mean(overall)
11653
+ features['overall_std'] = std(overall)
11654
+ features['overall_min'] = min(overall)
11655
+ features['overall_max'] = max(overall)
11656
+ features['overall_cv'] = (features['overall_std'] / features['overall_mean']
11657
+ if features['overall_mean'] and features['overall_mean'] != 0 else None)
11658
+ else:
11659
+ for key in ['overall_mean', 'overall_std', 'overall_min', 'overall_max', 'overall_cv']:
11660
+ features[key] = None
11661
+
11662
+ #=================================================================
11663
+
11664
+ onsets = []
11665
+ cumulative = 0
11666
+ for dt in delta_times:
11667
+ onsets.append(cumulative)
11668
+ cumulative += dt
11669
+ if onsets and durations:
11670
+ overall_piece_duration = onsets[-1] + durations[-1]
11671
+ else:
11672
+ overall_piece_duration = None
11673
+ features['overall_piece_duration'] = overall_piece_duration
11674
+ features['overall_notes_density'] = (len(arr) / overall_piece_duration
11675
+ if overall_piece_duration and overall_piece_duration > 0 else None)
11676
+ features['rhythm_ratio'] = (features['durations_mean'] / features['delta_times_mean']
11677
+ if features['delta_times_mean'] and features['delta_times_mean'] != 0 else None)
11678
+ features['overall_sum_delta_times'] = (sum(delta_times) if delta_times else None)
11679
+ features['overall_sum_durations'] = (sum(durations) if durations else None)
11680
+ features['overall_voicing_ratio'] = (sum(durations) / overall_piece_duration
11681
+ if overall_piece_duration and durations else None)
11682
+ features['overall_onset_std'] = std(onsets) if onsets else None
11683
+
11684
+ #=================================================================
11685
+
11686
+ chords_raw = []
11687
+ chords_pc = []
11688
+ current_group = []
11689
+ for i, note in enumerate(arr):
11690
+ dt = note[0]
11691
+ if i == 0:
11692
+ current_group = [i]
11693
+ else:
11694
+ if dt == 0:
11695
+ current_group.append(i)
11696
+ else:
11697
+ if len(current_group) >= 2:
11698
+ chord_notes = [arr[j][2] for j in current_group]
11699
+ chords_raw.append(tuple(sorted(chord_notes)))
11700
+ chords_pc.append(tuple(sorted(set(p % 12 for p in chord_notes))))
11701
+
11702
+ current_group = [i]
11703
+
11704
+ if current_group and len(current_group) >= 2:
11705
+ chord_notes = [arr[j][2] for j in current_group]
11706
+ chords_raw.append(tuple(sorted(chord_notes)))
11707
+ chords_pc.append(tuple(sorted(set(p % 12 for p in chord_notes))))
11708
+
11709
+ if chords_raw:
11710
+ chord_count = len(chords_raw)
11711
+ features['chords_count'] = chord_count
11712
+ features['chords_density'] = (chord_count / overall_piece_duration
11713
+ if overall_piece_duration and chord_count is not None else None)
11714
+ chord_sizes = [len(ch) for ch in chords_raw]
11715
+ features['chords_size_mean'] = mean(chord_sizes)
11716
+ features['chords_size_std'] = std(chord_sizes)
11717
+ features['chords_size_min'] = min(chord_sizes) if chord_sizes else None
11718
+ features['chords_size_max'] = max(chord_sizes) if chord_sizes else None
11719
+ features['chords_unique_raw_count'] = len(set(chords_raw))
11720
+ features['chords_unique_pc_count'] = len(set(chords_pc))
11721
+ features['chords_entropy_raw'] = entropy(chords_raw)
11722
+ features['chords_entropy_pc'] = entropy(chords_pc)
11723
+ if len(chords_raw) > 1:
11724
+ rep_raw = sum(1 for i in range(1, len(chords_raw)) if chords_raw[i] == chords_raw[i - 1])
11725
+ features['chords_repeat_ratio_raw'] = rep_raw / (len(chords_raw) - 1)
11726
+ else:
11727
+ features['chords_repeat_ratio_raw'] = None
11728
+ if len(chords_pc) > 1:
11729
+ rep_pc = sum(1 for i in range(1, len(chords_pc)) if chords_pc[i] == chords_pc[i - 1])
11730
+ features['chords_repeat_ratio_pc'] = rep_pc / (len(chords_pc) - 1)
11731
+ else:
11732
+ features['chords_repeat_ratio_pc'] = None
11733
+ if len(chords_raw) > 1:
11734
+ bigrams_raw = [(chords_raw[i], chords_raw[i + 1]) for i in range(len(chords_raw) - 1)]
11735
+ features['chords_bigram_entropy_raw'] = entropy(bigrams_raw)
11736
+ else:
11737
+ features['chords_bigram_entropy_raw'] = None
11738
+ if len(chords_pc) > 1:
11739
+ bigrams_pc = [(chords_pc[i], chords_pc[i + 1]) for i in range(len(chords_pc) - 1)]
11740
+ features['chords_bigram_entropy_pc'] = entropy(bigrams_pc)
11741
+ else:
11742
+ features['chords_bigram_entropy_pc'] = None
11743
+ features['chords_mode_raw'] = mode(chords_raw)
11744
+ features['chords_mode_pc'] = mode(chords_pc)
11745
+ if chords_pc:
11746
+ pc_sizes = [len(ch) for ch in chords_pc]
11747
+ features['chords_pc_size_mean'] = mean(pc_sizes)
11748
+ else:
11749
+ features['chords_pc_size_mean'] = None
11750
+ else:
11751
+ for key in ['chords_count', 'chords_density', 'chords_size_mean', 'chords_size_std',
11752
+ 'chords_size_min', 'chords_size_max', 'chords_unique_raw_count', 'chords_unique_pc_count',
11753
+ 'chords_entropy_raw', 'chords_entropy_pc', 'chords_repeat_ratio_raw', 'chords_repeat_ratio_pc',
11754
+ 'chords_bigram_entropy_raw', 'chords_bigram_entropy_pc', 'chords_mode_raw', 'chords_mode_pc',
11755
+ 'chords_pc_size_mean']:
11756
+ features[key] = None
11757
+
11758
+ #=================================================================
11759
+
11760
+ if delta_times:
11761
+ med_dt = features['delta_times_median']
11762
+ iqr_dt = features['delta_times_iqr']
11763
+ threshold_a = med_dt + 1.5 * iqr_dt if med_dt is not None and iqr_dt is not None else None
11764
+ threshold_b = percentile(delta_times, 90)
11765
+ if threshold_a is not None and threshold_b is not None:
11766
+ phrase_threshold = max(threshold_a, threshold_b)
11767
+ elif threshold_a is not None:
11768
+ phrase_threshold = threshold_a
11769
+ elif threshold_b is not None:
11770
+ phrase_threshold = threshold_b
11771
+ else:
11772
+ phrase_threshold = None
11773
+ else:
11774
+ phrase_threshold = None
11775
+
11776
+ phrases = []
11777
+ current_phrase = []
11778
+ if onsets:
11779
+ current_phrase.append(0)
11780
+ for i in range(len(onsets) - 1):
11781
+ gap = onsets[i + 1] - onsets[i]
11782
+ if phrase_threshold is not None and gap > phrase_threshold:
11783
+ phrases.append(current_phrase)
11784
+ current_phrase = []
11785
+ current_phrase.append(i + 1)
11786
+ if current_phrase:
11787
+ phrases.append(current_phrase)
11788
+ if phrases:
11789
+ phrase_note_counts = []
11790
+ phrase_durations = []
11791
+ phrase_densities = []
11792
+ phrase_mean_pitches = []
11793
+ phrase_pitch_ranges = []
11794
+ phrase_start_times = []
11795
+ phrase_end_times = []
11796
+ for phrase in phrases:
11797
+ note_count = len(phrase)
11798
+ phrase_note_counts.append(note_count)
11799
+ ph_start = onsets[phrase[0]]
11800
+ ph_end = onsets[phrase[-1]] + durations[phrase[-1]]
11801
+ phrase_start_times.append(ph_start)
11802
+ phrase_end_times.append(ph_end)
11803
+ ph_duration = ph_end - ph_start
11804
+ phrase_durations.append(ph_duration)
11805
+ density = note_count / ph_duration if ph_duration > 0 else None
11806
+ phrase_densities.append(density)
11807
+ ph_pitches = [pitches[i] for i in phrase if i < len(pitches)]
11808
+ phrase_mean_pitches.append(mean(ph_pitches) if ph_pitches else None)
11809
+ phrase_pitch_ranges.append((max(ph_pitches) - min(ph_pitches)) if ph_pitches else None)
11810
+ if len(phrases) > 1:
11811
+ phrase_gaps = []
11812
+ for i in range(len(phrases) - 1):
11813
+ gap = phrase_start_times[i + 1] - phrase_end_times[i]
11814
+ phrase_gaps.append(gap if gap > 0 else 0)
11815
+ else:
11816
+ phrase_gaps = []
11817
+ features['phrases_count'] = len(phrases)
11818
+ features['phrases_avg_note_count'] = mean(phrase_note_counts) if phrase_note_counts else None
11819
+ features['phrases_std_note_count'] = std(phrase_note_counts) if phrase_note_counts else None
11820
+ features['phrases_min_note_count'] = min(phrase_note_counts) if phrase_note_counts else None
11821
+ features['phrases_max_note_count'] = max(phrase_note_counts) if phrase_note_counts else None
11822
+ features['phrases_avg_duration'] = mean(phrase_durations) if phrase_durations else None
11823
+ features['phrases_std_duration'] = std(phrase_durations) if phrase_durations else None
11824
+ features['phrases_min_duration'] = min(phrase_durations) if phrase_durations else None
11825
+ features['phrases_max_duration'] = max(phrase_durations) if phrase_durations else None
11826
+ features['phrases_avg_density'] = mean(phrase_densities) if phrase_densities else None
11827
+ features['phrases_std_density'] = std(phrase_densities) if phrase_densities else None
11828
+ features['phrases_avg_mean_pitch'] = mean(phrase_mean_pitches) if phrase_mean_pitches else None
11829
+ features['phrases_avg_pitch_range'] = mean(phrase_pitch_ranges) if phrase_pitch_ranges else None
11830
+ if phrase_gaps:
11831
+ features['phrases_avg_gap'] = mean(phrase_gaps)
11832
+ features['phrases_std_gap'] = std(phrase_gaps)
11833
+ features['phrases_min_gap'] = min(phrase_gaps)
11834
+ features['phrases_max_gap'] = max(phrase_gaps)
11835
+ else:
11836
+ features['phrases_avg_gap'] = None
11837
+ features['phrases_std_gap'] = None
11838
+ features['phrases_min_gap'] = None
11839
+ features['phrases_max_gap'] = None
11840
+ features['phrases_threshold'] = phrase_threshold
11841
+ else:
11842
+ for key in ['phrases_count', 'phrases_avg_note_count', 'phrases_std_note_count',
11843
+ 'phrases_min_note_count', 'phrases_max_note_count', 'phrases_avg_duration',
11844
+ 'phrases_std_duration', 'phrases_min_duration', 'phrases_max_duration',
11845
+ 'phrases_avg_density', 'phrases_std_density', 'phrases_avg_mean_pitch',
11846
+ 'phrases_avg_pitch_range', 'phrases_avg_gap', 'phrases_std_gap',
11847
+ 'phrases_min_gap', 'phrases_max_gap', 'phrases_threshold']:
11848
+ features[key] = None
11849
+
11850
+ #=================================================================
11851
+
11852
+ return features
11853
+
11854
+ ###################################################################################
11855
+
11856
+ def winsorized_normalize(data, new_range=(0, 255), clip=1.5):
11857
+
11858
+ #=================================================================
11859
+
11860
+ new_min, new_max = new_range
11861
+
11862
+ #=================================================================
11863
+
11864
+ def percentile(values, p):
11865
+
11866
+ srt = sorted(values)
11867
+ n = len(srt)
11868
+ if n == 1:
11869
+ return srt[0]
11870
+ k = (n - 1) * p / 100.0
11871
+ f = int(k)
11872
+ c = k - f
11873
+ if f + 1 < n:
11874
+ return srt[f] * (1 - c) + srt[f + 1] * c
11875
+
11876
+ return srt[f]
11877
+
11878
+ #=================================================================
11879
+
11880
+ q1 = percentile(data, 25)
11881
+ q3 = percentile(data, 75)
11882
+ iqr = q3 - q1
11883
+
11884
+ lower_bound_w = q1 - clip * iqr
11885
+ upper_bound_w = q3 + clip * iqr
11886
+
11887
+ data_min = min(data)
11888
+ data_max = max(data)
11889
+ effective_low = max(lower_bound_w, data_min)
11890
+ effective_high = min(upper_bound_w, data_max)
11891
+
11892
+ #=================================================================
11893
+
11894
+ if effective_high == effective_low:
11895
+
11896
+ if data_max == data_min:
11897
+ return [int(new_min)] * len(data)
11898
+
11899
+ normalized = [(x - data_min) / (data_max - data_min) for x in data]
11900
+
11901
+ return [int(round(new_min + norm * (new_max - new_min))) for norm in normalized]
11902
+
11903
+ #=================================================================
11904
+
11905
+ clipped = [x if x >= effective_low else effective_low for x in data]
11906
+ clipped = [x if x <= effective_high else effective_high for x in clipped]
11907
+
11908
+ normalized = [(x - effective_low) / (effective_high - effective_low) for x in clipped]
11909
+
11910
+ #=================================================================
11911
+
11912
+ return [int(round(new_min + norm * (new_max - new_min))) for norm in normalized]
11913
+
11914
+ ###################################################################################
11915
+
11916
+ def tokenize_features_to_ints_winsorized(features, new_range=(0, 255), clip=1.5, none_token=-1):
11917
+
11918
+ values = []
11919
+ tokens = []
11920
+
11921
+ #=================================================================
11922
+
11923
+ def process_value(val):
11924
+
11925
+ if isinstance(val, (int, float)):
11926
+ return int(round(abs(val)))
11927
+
11928
+ elif isinstance(val, (list, tuple)):
11929
+ return int(round(abs(sum(val) / len(val))))
11930
+
11931
+ else:
11932
+ return int(abs(hash(val)) % (10 ** 8))
11933
+
11934
+ #=================================================================
11935
+
11936
+ for key in sorted(features.keys()):
11937
+
11938
+ value = features[key]
11939
+
11940
+ if value is None:
11941
+ tokens.append(none_token)
11942
+ values.append(none_token)
11943
+
11944
+ else:
11945
+ tokens.append(process_value(value))
11946
+
11947
+ if isinstance(value, (list, tuple)):
11948
+ values.append(sum(value) / len(value))
11949
+
11950
+ else:
11951
+ values.append(value)
11952
+
11953
+ #=================================================================
11954
+
11955
+ norm_tokens = winsorized_normalize(tokens, new_range, clip)
11956
+
11957
+ #=================================================================
11958
+
11959
+ return values, tokens, norm_tokens
11960
+
11961
+ ###################################################################################
11962
+
11963
+ def write_jsonl(records_dicts_list,
11964
+ file_name='data',
11965
+ file_ext='.jsonl',
11966
+ file_mode='w',
11967
+ line_sep='\n',
11968
+ verbose=True
11969
+ ):
11970
+
11971
+ if verbose:
11972
+ print('=' * 70)
11973
+ print('Writing', len(records_dicts_list), 'records to jsonl file...')
11974
+ print('=' * 70)
11975
+
11976
+ if not os.path.splitext(file_name)[1]:
11977
+ file_name += file_ext
11978
+
11979
+ l_count = 0
11980
+
11981
+ with open(file_name, mode=file_mode) as f:
11982
+ for record in tqdm.tqdm(records_dicts_list, disable=not verbose):
11983
+ f.write(json.dumps(record) + line_sep)
11984
+ l_count += 1
11985
+
11986
+ f.close()
11987
+
11988
+ if verbose:
11989
+ print('=' * 70)
11990
+ print('Written total of', l_count, 'jsonl records.')
11991
+ print('=' * 70)
11992
+ print('Done!')
11993
+ print('=' * 70)
11994
+
11995
+ ###################################################################################
11996
+
11997
+ def read_jsonl(file_name='data',
11998
+ file_ext='.jsonl',
11999
+ verbose=True
12000
+ ):
12001
+
12002
+ if verbose:
12003
+ print('=' * 70)
12004
+ print('Reading jsonl file...')
12005
+ print('=' * 70)
12006
+
12007
+ if not os.path.splitext(file_name)[1]:
12008
+ file_name += file_ext
12009
+
12010
+ with open(file_name, 'r') as f:
12011
+
12012
+ records = []
12013
+ gl_count = 0
12014
+
12015
+ for i, line in tqdm.tqdm(enumerate(f), disable=not verbose):
12016
+
12017
+ try:
12018
+ record = json.loads(line)
12019
+ records.append(record)
12020
+ gl_count += 1
12021
+
12022
+ except KeyboardInterrupt:
12023
+ if verbose:
12024
+ print('=' * 70)
12025
+ print('Stoping...')
12026
+ print('=' * 70)
12027
+
12028
+ f.close()
12029
+
12030
+ return records
12031
+
12032
+ except json.JSONDecodeError:
12033
+ if verbose:
12034
+ print('=' * 70)
12035
+ print('[ERROR] Line', i, 'is corrupted! Skipping it...')
12036
+ print('=' * 70)
12037
+
12038
+ continue
12039
+
12040
+ f.close()
12041
+
12042
+ if verbose:
12043
+ print('=' * 70)
12044
+ print('Loaded total of', gl_count, 'jsonl records.')
12045
+ print('=' * 70)
12046
+ print('Done!')
12047
+ print('=' * 70)
12048
+
12049
+ return records
12050
+
12051
+ ###################################################################################
12052
+
12053
+ def read_jsonl_lines(lines_indexes_list,
12054
+ file_name='data',
12055
+ file_ext='.jsonl',
12056
+ verbose=True
12057
+ ):
12058
+
12059
+ if verbose:
12060
+ print('=' * 70)
12061
+ print('Reading jsonl file...')
12062
+ print('=' * 70)
12063
+
12064
+ if not os.path.splitext(file_name)[1]:
12065
+ file_name += file_ext
12066
+
12067
+ records = []
12068
+ l_count = 0
12069
+
12070
+ lines_indexes_list.sort(reverse=True)
12071
+
12072
+ with open(file_name, 'r') as f:
12073
+ for current_line_number, line in tqdm.tqdm(enumerate(f)):
12074
+
12075
+ try:
12076
+ if current_line_number in lines_indexes_list:
12077
+ record = json.loads(line)
12078
+ records.append(record)
12079
+ lines_indexes_list = lines_indexes_list[:-1]
12080
+ l_count += 1
12081
+
12082
+ if not lines_indexes_list:
12083
+ break
12084
+
12085
+ except KeyboardInterrupt:
12086
+ if verbose:
12087
+ print('=' * 70)
12088
+ print('Stoping...')
12089
+ print('=' * 70)
12090
+
12091
+ f.close()
12092
+
12093
+ return records
12094
+
12095
+ except json.JSONDecodeError:
12096
+ if verbose:
12097
+ print('=' * 70)
12098
+ print('[ERROR] Line', current_line_number, 'is corrupted! Skipping it...')
12099
+ print('=' * 70)
12100
+
12101
+ continue
12102
+
12103
+ f.close()
12104
+
12105
+ if verbose:
12106
+ print('=' * 70)
12107
+ print('Loaded total of', l_count, 'jsonl records.')
12108
+ print('=' * 70)
12109
+ print('Done!')
12110
+ print('=' * 70)
12111
+
12112
+ return records
12113
+
12114
+ ###################################################################################
12115
+
12116
+ def compute_base(x: int, n: int) -> int:
12117
+
12118
+ if x < 0:
12119
+ raise ValueError("x must be non-negative.")
12120
+ if x == 0:
12121
+ return 2
12122
+
12123
+ b = max(2, int(x ** (1 / n)))
12124
+
12125
+ if b ** n <= x:
12126
+ b += 1
12127
+
12128
+ return b
12129
+
12130
+ ###################################################################################
12131
+
12132
+ def encode_int_auto(x: int, n: int) -> tuple[int, list[int]]:
12133
+
12134
+ base = compute_base(x, n)
12135
+ digits = [0] * n
12136
+
12137
+ for i in range(n - 1, -1, -1):
12138
+ digits[i] = x % base
12139
+ x //= base
12140
+
12141
+ return base, digits
12142
+
12143
+ ###################################################################################
12144
+
12145
+ def decode_int_auto(base: int, digits: list[int]) -> int:
12146
+
12147
+ x = 0
12148
+ for digit in digits:
12149
+ if digit < 0 or digit >= base:
12150
+ raise ValueError(f"Each digit must be in the range 0 to {base - 1}. Invalid digit: {digit}")
12151
+
12152
+ x = x * base + digit
12153
+
12154
+ return x
12155
+
12156
+ ###################################################################################
12157
+
12158
+ def encode_int_manual(x, base, n):
12159
+
12160
+ digits = [0] * n
12161
+
12162
+ for i in range(n - 1, -1, -1):
12163
+ digits[i] = x % base
12164
+ x //= base
12165
+
12166
+ return digits
12167
+
12168
+ ###################################################################################
12169
+
12170
+ def escore_notes_pitches_chords_signature(escore_notes,
12171
+ max_patch=128,
12172
+ sort_by_counts=False,
12173
+ use_full_chords=False
12174
+ ):
12175
+
12176
+ escore_notes = [e for e in escore_notes if e[6] <= max_patch % 129]
12177
+
12178
+ if escore_notes:
12179
+
12180
+ cscore = chordify_score([1000, escore_notes])
12181
+
12182
+ sig = []
12183
+ dsig = []
12184
+
12185
+ drums_offset = 321 + 128
12186
+
12187
+ bad_chords_counter = 0
12188
+
12189
+ for c in cscore:
12190
+
12191
+ all_pitches = [e[4] if e[3] != 9 else e[4]+128 for e in c]
12192
+ chord = sorted(set(all_pitches))
12193
+
12194
+ pitches = sorted([p for p in chord if p < 128], reverse=True)
12195
+ drums = [(d+drums_offset)-128 for d in chord if d > 127]
12196
+
12197
+ if pitches:
12198
+ if len(pitches) > 1:
12199
+ tones_chord = sorted(set([p % 12 for p in pitches]))
12200
+
12201
+ try:
12202
+ sig_token = ALL_CHORDS_SORTED.index(tones_chord) + 128
12203
+ except:
12204
+ checked_tones_chord = check_and_fix_tones_chord(tones_chord, use_full_chords=use_full_chords)
12205
+ sig_token = ALL_CHORDS_SORTED.index(checked_tones_chord) + 128
12206
+ bad_chords_counter += 1
12207
+
12208
+ elif len(pitches) == 1:
12209
+ sig_token = pitches[0]
12210
+
12211
+ sig.append(sig_token)
12212
+
12213
+ if drums:
12214
+ dsig.extend(drums)
12215
+
12216
+ sig_p = {}
12217
+
12218
+ for item in sig+dsig:
12219
+
12220
+ if item in sig_p:
12221
+ sig_p[item] += 1
12222
+
12223
+ else:
12224
+ sig_p[item] = 1
12225
+
12226
+ sig_p[-1] = bad_chords_counter
12227
+
12228
+ fsig = [list(v) for v in sig_p.items()]
12229
+
12230
+ if sort_by_counts:
12231
+ fsig.sort(key=lambda x: x[1], reverse=True)
12232
+
12233
+ return fsig
12234
+
12235
+ else:
12236
+ return []
12237
+
12238
+ ###################################################################################
12239
+
12240
+ def compute_sustain_intervals(events):
12241
+
12242
+ intervals = []
12243
+ pedal_on = False
12244
+ current_start = None
12245
+
12246
+ for t, cc in events:
12247
+ if not pedal_on and cc >= 64:
12248
+
12249
+ pedal_on = True
12250
+ current_start = t
12251
+ elif pedal_on and cc < 64:
12252
+
12253
+ pedal_on = False
12254
+ intervals.append((current_start, t))
12255
+ current_start = None
12256
+
12257
+ if pedal_on:
12258
+ intervals.append((current_start, float('inf')))
12259
+
12260
+ merged = []
12261
+
12262
+ for interval in intervals:
12263
+ if merged and interval[0] <= merged[-1][1]:
12264
+ merged[-1] = (merged[-1][0], max(merged[-1][1], interval[1]))
12265
+ else:
12266
+ merged.append(interval)
12267
+ return merged
12268
+
12269
+ ###################################################################################
12270
+
12271
+ def apply_sustain_to_ms_score(score):
12272
+
12273
+ sustain_by_channel = {}
12274
+
12275
+ for track in score[1:]:
12276
+ for event in track:
12277
+ if event[0] == 'control_change' and event[3] == 64:
12278
+ channel = event[2]
12279
+ sustain_by_channel.setdefault(channel, []).append((event[1], event[4]))
12280
+
12281
+ sustain_intervals_by_channel = {}
12282
+
12283
+ for channel, events in sustain_by_channel.items():
12284
+ events.sort(key=lambda x: x[0])
12285
+ sustain_intervals_by_channel[channel] = compute_sustain_intervals(events)
12286
+
12287
+ global_max_off = 0
12288
+
12289
+ for track in score[1:]:
12290
+ for event in track:
12291
+ if event[0] == 'note':
12292
+ global_max_off = max(global_max_off, event[1] + event[2])
12293
+
12294
+ for channel, intervals in sustain_intervals_by_channel.items():
12295
+ updated_intervals = []
12296
+ for start, end in intervals:
12297
+ if end == float('inf'):
12298
+ end = global_max_off
12299
+ updated_intervals.append((start, end))
12300
+ sustain_intervals_by_channel[channel] = updated_intervals
12301
+
12302
+ if sustain_intervals_by_channel:
12303
+
12304
+ for track in score[1:]:
12305
+ for event in track:
12306
+ if event[0] == 'note':
12307
+ start = event[1]
12308
+ nominal_dur = event[2]
12309
+ nominal_off = start + nominal_dur
12310
+ channel = event[3]
12311
+
12312
+ intervals = sustain_intervals_by_channel.get(channel, [])
12313
+ effective_off = nominal_off
12314
+
12315
+ for intv_start, intv_end in intervals:
12316
+ if intv_start < nominal_off < intv_end:
12317
+ effective_off = intv_end
12318
+ break
12319
+
12320
+ effective_dur = effective_off - start
12321
+
12322
+ event[2] = effective_dur
12323
+
12324
+ return score
12325
+
12326
+ ###################################################################################
12327
+
12328
+ def copy_file(src_file: str, trg_dir: str, add_subdir: bool = False, verbose: bool = False):
12329
+
12330
+ src_path = Path(src_file)
12331
+ target_directory = Path(trg_dir)
12332
+
12333
+ if not src_path.is_file():
12334
+ if verbose:
12335
+ print("Source file does not exist or is not a file.")
12336
+
12337
+ return None
12338
+
12339
+ target_directory.mkdir(parents=True, exist_ok=True)
12340
+
12341
+ if add_subdir:
12342
+ first_letter = src_path.name[0]
12343
+ target_directory = target_directory / first_letter
12344
+ target_directory.mkdir(parents=True, exist_ok=True)
12345
+
12346
+ destination = target_directory / src_path.name
12347
+
12348
+ try:
12349
+ shutil.copy2(src_path, destination)
12350
+
12351
+ except:
12352
+ if verbose:
12353
+ print('File could not be copied!')
12354
+
12355
+ return None
12356
+
12357
+ if verbose:
12358
+ print('File copied!')
12359
+
12360
+ return None
12361
+
12362
+ ###################################################################################
12363
  # This is the end of the TMIDI X Python module
 
12364
  ###################################################################################