$ cat gap_in_seconds_frequency.py import datetime import requests from io import BytesIO, TextIOWrapper from zipfile import ZipFile bytes = requests.get('https://s3.amazonaws.com/keras-datasets/jena_climate_2009_2016.csv.zip').content with ZipFile(BytesIO(bytes)) as archive: with TextIOWrapper(archive.open('jena_climate_2009_2016.csv', 'r')) as file: header = file.readline().strip('\r\n') lines = file.read().splitlines() expected_length = len(header.split(',')) gap_in_seconds_frequency = {} previous_time = None for line in lines: value = line.split(',') if (len(value) != expected_length): print('ERROR: ' + line) current_time = datetime.datetime.strptime(value[0], '%d.%m.%Y %H:%M:%S') if (previous_time is not None): gap = (current_time - previous_time).seconds if (gap in gap_in_seconds_frequency): gap_in_seconds_frequency[gap] += 1 else: gap_in_seconds_frequency[gap] = 1 previous_time = current_time print(sorted(gap_in_seconds_frequency.items())) $ time python gap_in_seconds_frequency.py [(600, 420443), (1200, 2), (1800, 1), (8400, 1), (57600, 1), (60600, 1), (63600, 1)] real 0m12.454s user 0m2.791s sys 0m0.055s $