2023-02-12 15:59:00 +01:00
#!/usr/bin/env python3
2023-02-13 13:12:39 +01:00
from contextlib import nullcontext
2023-02-12 15:59:00 +01:00
import csv
2023-02-12 18:18:30 +01:00
from datetime import date, datetime, time, timedelta
2023-02-12 15:59:00 +01:00
import os
import json
from pytz import timezone
import requests
2023-04-25 11:51:15 +02:00
from time import sleep
2023-02-12 15:59:00 +01:00
2023-02-12 17:22:40 +01:00
import click
2023-02-12 23:09:20 +01:00
from flask import Flask, render_template
2023-02-12 17:22:40 +01:00
from flask.cli import AppGroup
2023-02-12 15:59:00 +01:00
from flask_migrate import Migrate
from flask_sqlalchemy import SQLAlchemy
2023-02-12 17:22:40 +01:00
from sqlalchemy import Boolean, Column, Date, DateTime, Integer, String, Time
2023-02-12 18:18:30 +01:00
from sqlalchemy.sql import func
2023-02-12 15:59:00 +01:00
from tqdm import tqdm
import config
app = Flask(__name__)
2023-02-12 17:22:40 +01:00
cli = AppGroup('tgvmax', help="Manage the TGVMax dataset.")
2023-02-12 15:59:00 +01:00
app.config |= config.FLASK_CONFIG
db = SQLAlchemy(app)
Migrate(app, db)
class Train(db.Model):
__tablename__ = 'train'
id = Column(String, primary_key=True)
day = Column(Date, index=True)
number = Column(Integer, index=True)
2023-02-12 23:48:10 +01:00
entity = Column(String(16))
2023-02-12 17:22:40 +01:00
axe = Column(String(32), index=True)
2023-02-12 15:59:00 +01:00
orig_iata = Column(String(5), index=True)
dest_iata = Column(String(5), index=True)
2023-02-12 17:22:40 +01:00
orig = Column(String(32))
dest = Column(String(32))
dep = Column(Time)
2023-02-12 15:59:00 +01:00
arr = Column(Time)
tgvmax = Column(Boolean, index=True)
2023-02-12 17:22:40 +01:00
remaining_seats = Column(Integer, default=-1)
last_modification = Column(DateTime)
expiration_time = Column(DateTime)
2023-02-12 15:59:00 +01:00
2023-02-12 18:18:30 +01:00
class RouteQueue(db.Model):
id = Column(Integer, autoincrement=True, primary_key=True)
queue_time = Column(DateTime(timezone=True), server_default=func.now())
day = Column(Date)
origin = Column(String(5))
destination = Column(String(5))
response_time = Column(DateTime(timezone=True), nullable=True, default=None)
expiration_time = Column(DateTime(timezone=True), nullable=True, default=None)
2023-02-12 17:22:40 +01:00
2023-02-13 13:12:39 +01:00
@click.option('--verbose', '-v', is_flag=True, help="Display errors.")
def update_dataset(verbose: bool = False):
2023-02-12 17:22:40 +01:00
Query the latest version of the SNCF OpenData dataset, as a CSV file.
2023-02-13 13:12:39 +01:00
resp = requests.get('https://ressources.data.sncf.com/explore/dataset/tgvmax/information/')
content = resp.content.decode().split('<script type="application/ld+json">')[1].split('</script>')[0].strip()
content = content.replace('\r', '')
content = content.replace('" \n', '" \\n')
content = content.replace('.\n', '.\\n')
content = content.replace('\n\n \nLa', '\\n\\n \\nLa')
content = content.replace('\n"', '\\n"')
info = json.loads(content)
modified_date = datetime.fromisoformat(info['dateModified'])
utc = timezone('UTC')
last_modified = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=utc) if os.path.isfile(
'tgvmax.csv') else datetime(1, 1, 1, tzinfo=utc)
if last_modified < modified_date:
if verbose:
2023-02-12 15:59:00 +01:00
print("Updating tgvmax.csv…")
2023-02-13 13:12:39 +01:00
with requests.get(info['distribution'][0]['contentUrl'], stream=True) as resp:
with open('tgvmax.csv', 'wb') as f:
with tqdm(unit='io', unit_scale=True) if verbose else nullcontext() as t:
for chunk in resp.iter_content(chunk_size=512 * 1024):
if chunk:
if verbose:
2023-02-12 15:59:00 +01:00
2023-02-13 13:12:39 +01:00
os.utime('tgvmax.csv', (modified_date.timestamp(), modified_date.timestamp()))
if verbose:
2023-02-12 15:59:00 +01:00
2023-02-13 13:12:39 +01:00
if verbose:
print("Last modification:", modified_date)
2023-02-12 15:59:00 +01:00
2023-02-12 17:22:40 +01:00
@click.option('-F', '--flush', type=bool, is_flag=True, help="Flush the database before filling it.")
2023-02-13 13:12:39 +01:00
@click.option('--verbose', '-v', is_flag=True, help="Display errors.")
def parse_trains(flush: bool = False, verbose: bool = False):
2023-02-12 17:22:40 +01:00
Parse the CSV file and store it to the database.
if flush:
2023-02-13 13:12:39 +01:00
if verbose:
print("Flush database…")
2023-02-12 17:22:40 +01:00
last_modification = datetime.utcfromtimestamp(os.path.getmtime('tgvmax.csv')).replace(tzinfo=timezone('UTC'))
2023-02-12 15:59:00 +01:00
with open('tgvmax.csv') as f:
first_line = True
2023-02-12 17:22:40 +01:00
already_seen = set()
2023-02-13 13:12:39 +01:00
for line in (tqdm if verbose else lambda x: x)(csv.reader(f, delimiter=';')):
2023-02-12 15:59:00 +01:00
if first_line:
first_line = False
2023-02-12 17:22:40 +01:00
train_id = f"{line[1]}-{line[0]}-{line[4]}-{line[5]}"
if train_id in already_seen:
# Some trains are mysteriously duplicated, concerns only some « Intercités de nuit »
# and the Brive-la-Gaillarde -- Paris
# and, maybe, for Roubaix-Tourcoing
if line[3] != "IC NUIT" and line[1] != '3614' and not (line[4] == 'FRADP' and line[5] == 'FRADM'):
print("Duplicate:", train_id)
2023-02-12 15:59:00 +01:00
2023-02-12 17:22:40 +01:00
train = Train(
tgvmax=line[10] == 'OUI',
if flush:
2023-02-12 15:59:00 +01:00
2023-02-12 17:22:40 +01:00
if line[3] == "IC NUIT" or line[1] == '3614' or (line[4] == 'FRADP' and line[5] == 'FRADM'):
2023-02-12 15:59:00 +01:00
2023-02-12 17:22:40 +01:00
2023-02-12 15:59:00 +01:00
2023-04-25 11:51:15 +02:00
def find_routes(day: date | datetime, origin: str, destination: str | None,
verbose: bool = False):
if isinstance(day, datetime):
day = day.date()
2023-02-12 23:30:10 +01:00
trains = db.session.query(Train).filter_by(day=day, tgvmax=True).all()
2023-02-12 15:59:00 +01:00
trains.sort(key=lambda train: train.dep)
2023-02-13 12:08:24 +01:00
# For better results later, fetch all trains from the origin or to the destination
# This is not exhaustive, but can be a good approximation
2023-04-25 11:51:15 +02:00
queue_routes(day, origin=origin, verbose=verbose, autocommit=False)
2023-02-13 12:39:42 +01:00
if destination:
2023-04-25 11:51:15 +02:00
queue_routes(day, destination=destination, verbose=verbose, autocommit=False)
2023-02-13 12:08:24 +01:00
2023-02-12 15:59:00 +01:00
per_arr_explore = {}
valid_routes = []
2023-04-25 11:51:15 +02:00
for train in (t := tqdm(trains) if verbose else trains):
2023-02-12 15:59:00 +01:00
if train.orig == origin:
2023-02-13 12:08:24 +01:00
# Update from the TGVMax simulator
2023-04-25 11:51:15 +02:00
queue_route(day, train.orig_iata, train.dest_iata, verbose, False)
2023-02-13 12:08:24 +01:00
2023-02-12 15:59:00 +01:00
it = [train]
2023-02-12 23:30:10 +01:00
if train.dest == destination:
2023-02-12 15:59:00 +01:00
# We hope that we have a direct train
per_arr_explore.setdefault(train.dest, [])
for it in list(per_arr_explore.get(train.orig, [])):
if any(train.dest == tr.dest or train.dest == origin for tr in it):
# Avoid loops
last_train = it[-1]
if last_train.arr <= train.dep:
2023-02-13 12:08:24 +01:00
# Update from the TGVMax simulator, this line can be useful later
2023-04-25 11:51:15 +02:00
queue_route(day, train.orig_iata, train.dest_iata, verbose, False)
2023-02-13 12:08:24 +01:00
2023-02-12 15:59:00 +01:00
new_it = it + [train]
2023-02-12 23:30:10 +01:00
if train.dest == destination:
2023-02-12 15:59:00 +01:00
# Goal is achieved
per_arr_explore.setdefault(train.dest, [])
2023-04-25 11:51:15 +02:00
# Send queued trains to the database
2023-02-13 12:39:42 +01:00
return {destination: valid_routes} if destination else per_arr_explore
2023-02-12 15:59:00 +01:00
2023-04-25 11:51:15 +02:00
# Don't use the decorator to keep the function callable
cli.command('find-routes')(click.argument('day', type=click.DateTime(formats=['%Y-%m-%d']))
(click.argument('origin', type=str)
(click.argument('destination', type=str, default=None)
(click.option('--verbose', '-v', type=bool, is_flag=True, help="Display errors.")
def queue_route(day: date | datetime, origin: str, destination: str, verbose: bool = False, autocommit: bool = True):
2023-02-12 18:18:30 +01:00
Fetch the TGVMax simulator to refresh data.
DAY: The day to query, in format YYYY-MM-DD.
ORIGIN: The origin of the route.
DESTINATION: The destination of the route.
if isinstance(day, datetime):
day = day.date()
query = db.session.query(RouteQueue).filter_by(day=day, origin=origin, destination=destination, response_time=None)
if query.count():
2023-02-12 23:09:20 +01:00
query = db.session.query(RouteQueue).filter(RouteQueue.day == day,
RouteQueue.origin == origin,
RouteQueue.destination == destination,
RouteQueue.expiration_time >= datetime.now(timezone('UTC')))
if query.count():
2023-02-12 18:18:30 +01:00
db.session.add(RouteQueue(day=day, origin=origin, destination=destination))
2023-02-13 12:08:24 +01:00
# Don't use the decorator to keep the function callable
cli.command('queue-route')(click.argument('day', type=click.DateTime(formats=['%Y-%m-%d']))
(click.argument('origin', type=str)
(click.argument('destination', type=str)
(click.option('--verbose', '-v', type=bool, is_flag=True, help="Display errors.")
def queue_routes(day: date | datetime, origin: str | None = None,
2023-04-25 11:51:15 +02:00
destination: str | None = None, verbose: bool = False,
autocommit: bool = True):
2023-02-13 12:08:24 +01:00
if isinstance(day, datetime):
day = day.date()
query = db.session.query(Train).filter((Train.day == day))
if origin:
query = query.filter((Train.orig_iata == origin) | (Train.orig == origin))
if destination:
query = query.filter((Train.dest_iata == destination) | (Train.dest == destination))
2023-04-25 11:51:15 +02:00
query = query.all()
for train in (t := tqdm(query) if verbose else query):
if verbose:
t.set_description(f"{day}: {train.orig} --> {train.dest}")
queue_route(day, train.orig_iata, train.dest_iata, verbose, autocommit)
2023-02-13 12:08:24 +01:00
# Same as above
cli.command('queue-routes')(click.argument('day', type=click.DateTime(formats=['%Y-%m-%d']))
(click.option('--origin', '-o', default=None)
(click.option('--destination', '-d', default=None)
(click.option('--verbose', '-v', type=bool, is_flag=True, help="Display errors.")
2023-02-12 18:18:30 +01:00
@cli.command('process-queue', help="Process the waiting list to refresh from the simulator.")
2023-02-13 12:08:24 +01:00
@click.argument('number', default=30, type=int)
2023-04-25 11:51:15 +02:00
@click.option('--verbose', '-v', type=bool, is_flag=True, help="Display errors.")
def process_queue(number: int, verbose: bool = False):
queue = db.session.query(RouteQueue).filter_by(response_time=None).order_by(RouteQueue.queue_time).all()
2023-02-12 18:18:30 +01:00
if number > 0:
queue = queue[:number]
URL = "https://www.maxjeune-tgvinoui.sncf/api/public/refdata/search-freeplaces-proposals"
2023-04-25 11:51:15 +02:00
if verbose:
query = db.session.query(Train).with_entities(Train.orig_iata, Train.orig).distinct()
iata_to_names = {k: v for (k, v) in query.all()}
for i, req in enumerate(t := tqdm(queue) if verbose else queue):
2023-02-12 18:18:30 +01:00
req: RouteQueue
2023-04-25 11:51:15 +02:00
if verbose:
t.set_description(f"{req.day:%d/%m/%Y}: {iata_to_names[req.origin]} --> {iata_to_names[req.destination]}")
resp = None
while resp is None or resp.status_code == 429:
resp = requests.post(URL, json={
'departureDateTime': req.day.isoformat(),
'origin': req.origin,
'destination': req.destination,
if resp.status_code == 429:
2023-02-12 18:18:30 +01:00
if resp.status_code == 404:
# No travel found
req.response_time = datetime.now()
2023-04-25 11:51:15 +02:00
req.expiration_time = datetime.now() + timedelta(hours=3)
2023-02-12 18:18:30 +01:00
data = resp.json()
req.response_time = datetime.utcfromtimestamp(data['updatedAt'] // 1000).replace(tzinfo=timezone('UTC'))
req.expiration_time = datetime.utcfromtimestamp(data['expiresAt'] // 1000).replace(tzinfo=timezone('UTC'))
2023-02-12 23:48:10 +01:00
req.expiration_time += timedelta(hours=3) # By default 5 minutes, extend it to 3 hours to be safe
2023-02-12 18:18:30 +01:00
2023-02-13 12:39:42 +01:00
db.session.query(Train).filter_by(day=req.day, orig_iata=req.origin, dest_iata=req.destination) \
2023-02-12 18:18:30 +01:00
.update(dict(tgvmax=False, remaining_seats=-1))
for proposal in data['proposals']:
train = db.session.query(Train).filter_by(day=req.day, number=int(proposal['trainNumber']),
2023-04-25 11:51:15 +02:00
2023-02-13 12:08:24 +01:00
if train is None:
# In a city with multiple stations
2023-04-25 11:51:15 +02:00
2023-02-13 12:08:24 +01:00
2023-02-12 18:18:30 +01:00
train.tgvmax = True
train.remaining_seats = proposal['freePlaces']
train.last_modification = req.response_time
train.expiration_time = req.expiration_time
2023-04-25 11:51:15 +02:00
if i % 50 == 0:
2023-02-12 18:18:30 +01:00
2023-02-12 15:59:00 +01:00
def index():
2023-02-12 23:09:20 +01:00
return render_template('index.html', today=date.today(), max_day=date.today() + timedelta(days=30))
def iata_codes():
query = db.session.query(Train).with_entities(Train.orig_iata, Train.orig).distinct()
return {
'iata2name': {
k: v for (k, v) in query.all()
'name2iata': {
v: k for (k, v) in query.all()
2023-02-12 15:59:00 +01:00
2023-02-12 23:30:10 +01:00
2023-02-13 12:08:24 +01:00
def get_routes(day: date | str, origin: str, destination: str):
if isinstance(day, str):
day = date.fromisoformat(day)
2023-02-13 12:39:42 +01:00
if destination == 'undefined':
destination = None
2023-02-12 23:30:10 +01:00
routes = find_routes(day, origin, destination)
2023-02-13 12:39:42 +01:00
return {
city: [
'origin': tr.orig,
'origin_iata': tr.orig_iata,
'destination': tr.dest,
'destination_iata': tr.dest_iata,
'departure': tr.dep.isoformat(),
'arrival': tr.arr.isoformat(),
'number': tr.number,
'free_seats': tr.remaining_seats,
} for tr in route] for route in city_routes
] for city, city_routes in routes.items()
2023-02-12 23:30:10 +01:00
2023-02-12 15:59:00 +01:00
if __name__ == '__main__':