Commit 885d4d0f authored by LaurentMT's avatar LaurentMT

Merge branch 'develop' into 'master'

Merge develop into master for new version

See merge request !9
parents 88b9fee9 d703cb00
......@@ -107,3 +107,4 @@ dmypy.json
.venv/
.vscode/
.env
private_tests/
......@@ -41,6 +41,10 @@ requests[socks]
plotly >= 4.1.0
numpy >= 1.11.0
datasketch
## Installation
......@@ -86,6 +90,19 @@ download export help load plot quit score socks5 workdir
wst#/tmp>
```
Show help for a specific command
```
wst#/tmp> help socks5
Displays or sets the url:port of a socks5 proxy used to download snapshots files from OXT.
Examples:
socks5 127.0.0.1:9050 => sets the ip:port of your socks5 proxy
socks5 none => removes the socks5 proxy previously defined
socks5 => displays the current working directory
wst#/tmp>
```
Set Socks5 proxy before downloading data from OXT
```
wst#/tmp> socks5 127.0.0.1:9150
......@@ -115,7 +132,22 @@ Download complete
wst#/home/laurent/whirlpool>
```
Load and compute the statistcs for the snaphot
Display a few metrics for a transaction (mix or TX0) stored in a snapshot of the working directory
```
wst#/home/laurent/whirlpool> score 4e72519d391ce83e0659c9022a00344bedbb253de1747cf290162b3d3ea51479
Backward-looking metrics for the outputs of this mix:
anonset = 92
spread = 89%
Forward-looking metrics for the outputs of Tx0s having this transaction as their first mix:
anonset = 127
spread = 76%
wst#/home/laurent/whirlpool>
```
Load and compute the statistics for the snaphot
```
wst#/home/laurent/whirlpool> load 05
......@@ -145,21 +177,6 @@ Preparing the chart...
wst#/home/laurent/whirlpool>
```
Display the metrics computed for a transaction stored in the active snapshot
```
wst#/home/laurent/whirlpool> score 4e72519d391ce83e0659c9022a00344bedbb253de1747cf290162b3d3ea51479
Backward-looking metrics for the outputs of this mix:
anonset = 92
spread = 89%
Forward-looking metrics for the outputs of Tx0s having this transaction as their first mix:
anonset = 127
spread = 76%
wst#/home/laurent/whirlpool>
```
Quit WST
```
wst#/home/laurent/whirlpool> quit
......
PySocks
requests[socks]
plotly >= 4.1.0
\ No newline at end of file
plotly >= 4.1.0
numpy >= 1.11.0
datasketch
\ No newline at end of file
......@@ -28,6 +28,8 @@ setup(
install_requires=[
'PySocks',
'requests[socks]',
'plotly >= 4.1.0'
'plotly >= 4.1.0',
'numpy >= 1.11.0',
'datasketch'
]
)
......@@ -4,6 +4,8 @@ Copyright (c) 2019 Katana Cryptographic Ltd. All Rights Reserved.
A class computing a set of metrics for the mixed UTXOs (backward-looking)
'''
from collections import defaultdict
from datasketch import HyperLogLogPlusPlus
from whirlpool_stats.utils.constants import *
from whirlpool_stats.utils.date import get_datetime_of_day
......@@ -36,6 +38,18 @@ class BackwardMetrics(object):
'''
print('Start computing metrics (backward-looking)')
# Number of mixes
nb_mixes = len(self.snapshot.l_mix_txs)
# Mapping mix_tiid => {
# 'nb_refs": number of remaining refs from following mixes,
# 'ancestors': HLL++ storing ancestors tx0s
# }
d_mix_ancestors = dict()
# Mapping day => set od active tx0s
d_tmp_active_tx0s = defaultdict(set)
# Resets data structures storing the results
self.l_anonsets = []
self.l_spreads = []
......@@ -43,61 +57,51 @@ class BackwardMetrics(object):
self.d_inflow = defaultdict(int)
self.d_nb_active_tx0s = defaultdict(int)
# Dictionary day => set od active tx0s
d_tmp_active_tx0s = defaultdict(set)
# Iterates over the ordered list of mix txs
# and computes their anonsets and spreads (backward-looking)
mix_round = 0
nb_mixes = len(self.snapshot.l_mix_txs)
for tiid in self.snapshot.l_mix_txs:
# Resets the set of txs already reached during this walk
self.s_processed_txs.clear()
# Computes the anonset
anonset = self.get_nb_sources(tiid)
day = get_datetime_of_day(self.snapshot.l_ts_mix_txs[mix_round])
hll = HyperLogLogPlusPlus(p=HLL_P)
for prev_tiid in self.snapshot.d_reverse_links[tiid]:
if prev_tiid in self.snapshot.s_tx0s:
# Adds the tx0 to the HLL
hll.update(str(prev_tiid).encode('utf8'))
self.d_inflow[day] += 1
d_tmp_active_tx0s[day].add(prev_tiid)
else:
prev_info = d_mix_ancestors[prev_tiid]
# Merges the HLL with the HLL of the parent
hll.merge(prev_info['ancestors'])
if prev_info['nb_refs'] == 1:
del d_mix_ancestors[prev_tiid]
else:
prev_info['nb_refs'] -= 1
d_mix_ancestors[prev_tiid] = prev_info
# Computes and stores the anonset
anonset = round(hll.count())
self.l_anonsets.append(anonset)
# Computes the spread
# Computes and stores the spread
nb_past_tx0s = len(list(filter(lambda x: x < tiid, self.snapshot.l_tx0s)))
spread = float(anonset) * 100.0 / float(nb_past_tx0s)
self.l_spreads.append(spread)
# Updates activity metrics
day = get_datetime_of_day(self.snapshot.l_ts_mix_txs[mix_round])
# Increases the numbers of daily mixes
self.d_nb_mixes[day] += 1
prev_tiids = self.snapshot.d_reverse_links[tiid]
for prev_tiid in prev_tiids:
if prev_tiid in self.snapshot.s_tx0s:
self.d_inflow[day] += 1
d_tmp_active_tx0s[day].add(prev_tiid)
# Stores info about current mix
d_mix_ancestors[tiid] = {
'nb_refs': len(self.snapshot.d_links[tiid]),
'ancestors': hll
}
# Displays a trace
if mix_round % 100 == 0:
pct_progress = mix_round * 100 / nb_mixes
print(' Computed metrics for round %d (%d%%)' % (mix_round, pct_progress))
# Updates the mix round
mix_round += 1
# Fills d_nb_active_tx0s
for k,v in d_tmp_active_tx0s.items():
self.d_nb_active_tx0s[k] = len(v)
print('Done!')
d_mix_ancestors.clear()
def get_nb_sources(self, tiid):
'''
Gets the number of ancestor tx0s found for a tx
Parameters:
tiid = id of the transaction
'''
nb_tx0s = 0
prev_tiids = self.snapshot.d_reverse_links[tiid]
for prev_tiid in prev_tiids:
if prev_tiid not in self.s_processed_txs:
if prev_tiid in self.snapshot.s_mix_txs:
nb_tx0s += self.get_nb_sources(prev_tiid)
elif prev_tiid in self.snapshot.s_tx0s:
nb_tx0s += 1
self.s_processed_txs.add(prev_tiid)
self.s_processed_txs.add(tiid)
return nb_tx0s
print('Done!')
......@@ -3,6 +3,8 @@ Copyright (c) 2019 Katana Cryptographic Ltd. All Rights Reserved.
A class computing a set of metrics for the mixed UTXOs (forward-looking)
'''
import sys
from datasketch import HyperLogLogPlusPlus
from whirlpool_stats.utils.constants import *
......@@ -15,8 +17,6 @@ class ForwardMetrics(object):
snapshot = snapshot
'''
self.snapshot = snapshot
# Set of txs that have been processed
self.s_processed_txs = set()
# List of anonsets ordered by mix round
self.l_anonsets = []
# List of spreads ordered by mix round
......@@ -29,52 +29,69 @@ class ForwardMetrics(object):
'''
print('Start computing metrics (forward-looking)')
# Number of mixes
nb_mixes = len(self.snapshot.l_mix_txs)
# Forward number of utxos
# (number of UTXOs created after current mix included)
fwd_nb_utxos = 0
# Mapping mix_tiid => {
# 'nb_refs": number of remaining refs from previous mixes,
# 'descendants': HLL++ storing descendants UTXOs
# }
d_mix_descendants = dict()
# Resets data structures storing the results
self.l_anonsets = []
self.l_spreads = []
# Iterates over the ordered list of mix txs
# and computes their anonset
mix_round = 0
nb_mixes = len(self.snapshot.l_mix_txs)
for tiid in self.snapshot.l_mix_txs:
# Resets the set of txs already reached during this walk
self.s_processed_txs.clear()
# Computes the anonset
anonset = self.get_nb_descendants(tiid)
mix_round = nb_mixes - 1
for tiid in reversed(self.snapshot.l_mix_txs):
hll = HyperLogLogPlusPlus(p=HLL_P)
next_tiids = self.snapshot.d_links[tiid]
nb_utxos = NB_PARTICIPANTS - len(next_tiids)
# Adds an entry in the HLL for each UTXO
for i in range(0, nb_utxos):
utxo_id = '%d-%d' % (tiid, i)
hll.update(utxo_id.encode('utf8'))
# Updates the forward number of utxos
fwd_nb_utxos += nb_utxos
# Computes the set of descendants mixes (current mix included)
for next_tiid in next_tiids:
next_info = d_mix_descendants[next_tiid]
# Merges the HLL with the HLL of the parent
hll.merge(next_info['descendants'])
if next_info['nb_refs'] == 1:
del d_mix_descendants[next_tiid]
else:
next_info['nb_refs'] -= 1
d_mix_descendants[next_tiid] = next_info
# Computes and stores the anonset
anonset = round(hll.count())
self.l_anonsets.append(anonset)
# Computes the spread
nb_later_unmixed_txos = 0
for j in range(mix_round, len(self.snapshot.l_mix_txs)):
tiid_round_j = self.snapshot.l_mix_txs[j]
nb_remixes = len(self.snapshot.d_links[tiid_round_j])
nb_later_unmixed_txos += NB_PARTICIPANTS - nb_remixes
spread = float(anonset) * 100.0 / float(nb_later_unmixed_txos)
# Computes and stores the spread
spread = float(anonset) * 100.0 / float(fwd_nb_utxos)
self.l_spreads.append(spread)
# Stores info about current mix
prev_tiids = self.snapshot.d_reverse_links[tiid]
nb_refs = sum([1 if p_tiid in self.snapshot.s_mix_txs else 0 for p_tiid in prev_tiids])
d_mix_descendants[tiid] = {
'nb_refs': nb_refs,
'descendants': hll
}
# Displays a trace
if mix_round % 100 == 0:
pct_progress = mix_round * 100 / nb_mixes
if (nb_mixes - mix_round) % 100 == 0:
pct_progress = (nb_mixes - mix_round) * 100 / nb_mixes
print(' Computed metrics for round %d (%d%%)' % (mix_round, pct_progress))
mix_round += 1
print('Done!')
mix_round -= 1
# Reverses the lists
self.l_anonsets = list(reversed(self.l_anonsets))
self.l_spreads = list(reversed(self.l_spreads))
def get_nb_descendants(self, tiid):
'''
Gets the number of descendant UTXOs composing the forward-looking anonset of a tx
(= number of unspents + number of mixed txos that have left the pool)
Parameters:
tiid = id of the transaction
'''
next_tiids = self.snapshot.d_links[tiid]
nb_utxos = NB_PARTICIPANTS - len(next_tiids)
for next_tiid in next_tiids:
if next_tiid not in self.s_processed_txs:
if next_tiid in self.snapshot.s_mix_txs:
nb_utxos += self.get_nb_descendants(next_tiid)
self.s_processed_txs.add(tiid)
return nb_utxos
d_mix_descendants.clear()
print('Done!')
......@@ -59,7 +59,7 @@ class Snapshot(object):
self.snapshots_dir = snapshots_dir
def load(self, denom):
def load(self, denom, show_logs=True):
'''
Loads the snapshot for a given denomination
Parameters:
......@@ -69,7 +69,7 @@ class Snapshot(object):
self.reset_data()
self.denom = denom
print('Start loading snapshot for %s denomination' % self.denom)
show_logs and print('Start loading snapshot for %s denomination' % self.denom)
# Loads the mix txs
filename = '%s_%s.csv' % (FN_MIX_TXS, self.denom)
......@@ -89,7 +89,7 @@ class Snapshot(object):
self.l_ts_mix_txs.append(ts)
mix_round += 1
print(' Mix txs loaded')
show_logs and print(' Mix txs loaded')
# Loads the tx0s
filename = '%s_%s.csv' % (FN_TX0S, self.denom)
......@@ -109,7 +109,7 @@ class Snapshot(object):
nb_utxos = int(row[3])
self.l_utxos_tx0s.append(nb_utxos)
print(' Tx0s loaded')
show_logs and print(' Tx0s loaded')
# Loads the relationships between txs
filename = '%s_%s.csv' % (FN_LINKS, self.denom)
......@@ -124,7 +124,7 @@ class Snapshot(object):
self.d_links[src].append(tgt)
self.d_reverse_links[tgt].append(src)
print(' Tx links loaded')
show_logs and print(' Tx links loaded')
print('Done!')
show_logs and print('Done!')
'''
Copyright (c) 2019 Katana Cryptographic Ltd. All Rights Reserved.
A class computing a set of metrics for the mixed UTXOs (backward-looking)
'''
from whirlpool_stats.utils.constants import *
from whirlpool_stats.services.snapshot import Snapshot
class TxScores(object):
def __init__(self, snapshot):
'''
Constructor
Parameters:
snapshot = snapshot
'''
self.snapshot = snapshot
self.s_processed_txs = set()
def compute(self, txid, denom):
'''
Computes a set of scores
for a transaction (tx0 or mix) identified by its TXID
Parameters:
txid = Transaction id
denom (optional) = Pool denomination (05, 005, 001)
'''
print('Processing scores for transaction %s' % txid)
if denom is None:
denom = self.find_denom(txid)
if denom is None:
return None
elif denom not in ALL_DENOMS:
print('Invalid denomination code')
return None
self.snapshot.load(denom, False)
txid_prefix = txid[0:2*TXID_PREFIX_LENGTH]
if txid_prefix in self.snapshot.d_txids.keys():
# Transaction is a mix
fwd_scores = self.compute_fwd_scores(txid)
bwd_scores = self.compute_bwd_scores(txid)
return {
'type': 'mix',
'fwd_anonset': fwd_scores['anonset'],
'fwd_spread': fwd_scores['spread'],
'bwd_anonset': bwd_scores['anonset'],
'bwd_spread': bwd_scores['spread']
}
elif txid_prefix in self.snapshot.d_tx0s.keys():
# Transaction is a Tx0
tx0_scores = self.compute_tx0_scores(txid)
return {
'type': 'tx0',
'nb_outs': tx0_scores['nb_outs'],
'nb_counterparties': tx0_scores['nb_counterparties']
}
def find_denom(self, txid):
'''
Searches a txid in available snapshots
Returns the associated denom code if txid was found in a snapshot
otherwise returns None
Parameters:
txid = Transaction id
'''
tmp_snapshot = Snapshot(self.snapshot.snapshots_dir)
for denom in ALL_DENOMS:
try:
tmp_snapshot.load(denom, False)
prefix = txid[0:2*TXID_PREFIX_LENGTH]
if (prefix in tmp_snapshot.d_txids.keys()) or (prefix in tmp_snapshot.d_tx0s.keys()):
return denom
except:
pass
return None
def compute_tx0_scores(self, txid):
'''
Computes a few metrics for a tx0 identified by its txid
Parameters:
txid = Transaction id
'''
txid_prefix = txid[0:2*TXID_PREFIX_LENGTH]
tiid = self.snapshot.d_tx0s[txid_prefix]
tx0_idx = self.snapshot.l_tx0s.index(tiid)
# Gets the number of spent outputs for the current Tx0
first_mixes = self.snapshot.d_links[tiid]
nb_spent_txos = len(first_mixes)
# Lists the Tx0s acting as counterparties
# for the first mixes of the current Tx0
s_counterparties = set()
for tiid_mix in first_mixes:
prev_tiids = self.snapshot.d_reverse_links[tiid_mix]
# Checks if counterparty comes from a Tx0
for prev_tiid in prev_tiids:
if prev_tiid in self.snapshot.s_tx0s:
s_counterparties.add(prev_tiid)
# Gets the number of tx0s counterparties for the current Tx0
# (remove 1 for the current Tx0)
nb_counterparties = len(s_counterparties) - 1
# Returns the result
return {
'nb_outs': nb_spent_txos,
'nb_counterparties': nb_counterparties
}
def compute_fwd_scores(self, txid):
'''
Computes the forward-looking anonset and spread
Parameters:
txid = Transaction id
'''
txid_prefix = txid[0:2*TXID_PREFIX_LENGTH]
mix_round = self.snapshot.d_txids[txid_prefix]
tiid = self.snapshot.l_mix_txs[mix_round]
# Resets the set of txs already reached during this walk
self.s_processed_txs.clear()
# Computes the anonset
anonset = self.get_nb_descendants(tiid)
# Computes the spread
nb_later_unmixed_txos = 0
for j in range(mix_round, len(self.snapshot.l_mix_txs)):
tiid_round_j = self.snapshot.l_mix_txs[j]
nb_remixes = len(self.snapshot.d_links[tiid_round_j])
nb_later_unmixed_txos += NB_PARTICIPANTS - nb_remixes
spread = float(anonset) * 100.0 / float(nb_later_unmixed_txos)
# Returns the result
return {
'anonset': anonset,
'spread': spread
}
def compute_bwd_scores(self, txid):
'''
Computes the backward-looking anonset and spread
Parameters:
txid = Transaction id
'''
txid_prefix = txid[0:2*TXID_PREFIX_LENGTH]
mix_round = self.snapshot.d_txids[txid_prefix]
tiid = self.snapshot.l_mix_txs[mix_round]
# Resets the set of txs already reached during this walk
self.s_processed_txs.clear()
# Computes the anonset
anonset = self.get_nb_sources(tiid)
# Computes the spread
nb_past_tx0s = len(list(filter(lambda x: x < tiid, self.snapshot.l_tx0s)))
spread = float(anonset) * 100.0 / float(nb_past_tx0s)
# Returns the result
return {
'anonset': anonset,
'spread': spread
}
def get_nb_descendants(self, tiid):
'''
Gets the number of descendant UTXOs composing the forward-looking anonset of a tx
(= number of unspents + number of mixed txos that have left the pool)
Parameters:
tiid = id of the transaction
'''
next_tiids = self.snapshot.d_links[tiid]
nb_utxos = NB_PARTICIPANTS - len(next_tiids)
for next_tiid in next_tiids:
if next_tiid not in self.s_processed_txs:
if next_tiid in self.snapshot.s_mix_txs:
nb_utxos += self.get_nb_descendants(next_tiid)
self.s_processed_txs.add(tiid)
return nb_utxos
def get_nb_sources(self, tiid):
'''
Gets the number of ancestor tx0s found for a tx
Parameters:
tiid = id of the transaction
'''
nb_tx0s = 0
prev_tiids = self.snapshot.d_reverse_links[tiid]
for prev_tiid in prev_tiids:
if prev_tiid not in self.s_processed_txs:
if prev_tiid in self.snapshot.s_mix_txs:
nb_tx0s += self.get_nb_sources(prev_tiid)
elif prev_tiid in self.snapshot.s_tx0s:
nb_tx0s += 1
self.s_processed_txs.add(prev_tiid)
self.s_processed_txs.add(tiid)
return nb_tx0s
......@@ -34,3 +34,7 @@ NB_PARTICIPANTS = 5
# TXID prefix length (in bytes)
TXID_PREFIX_LENGTH = 8
# HYPERLOGLOG P VALUE
# P=14 should give us an error rate <~ 0.01
HLL_P = 14
......@@ -19,6 +19,7 @@ from whirlpool_stats.services.backward_metrics import BackwardMetrics
from whirlpool_stats.services.tx0s_metrics import Tx0sMetrics
from whirlpool_stats.services.exporter import Exporter
from whirlpool_stats.services.metrics_plotter import Plotter
from whirlpool_stats.services.tx_scores import TxScores
class WhirlpoolStats(Cmd):
......@@ -144,48 +145,52 @@ Examples:
def do_score(self, args):
'''
Displays the metrics for a mix tx identified by its txid
Displays the metrics for a mix tx identified by its txid