修訂 | 7f7bbc7162727ce236b32d4b607f70445a4dd397 (tree) |
---|---|
時間 | 2022-04-10 02:19:25 |
作者 | dyknon <dyknon@user...> |
Commiter | dyknon |
works
@@ -2,11 +2,11 @@ | ||
2 | 2 | |
3 | 3 | use warnings; |
4 | 4 | use strict; |
5 | -use feature qw(say state postderef); | |
5 | +use feature qw(say state postderef bitwise); | |
6 | 6 | |
7 | 7 | use Getopt::Long; |
8 | 8 | use Pod::Usage; |
9 | -use List::Util qw(reduce); | |
9 | +use List::Util qw(reduce sum0); | |
10 | 10 | use JSON; |
11 | 11 | use LWP::UserAgent (); |
12 | 12 | use HTTP::Date; |
@@ -22,7 +22,7 @@ my $asn; | ||
22 | 22 | my $ipv4 = 1; |
23 | 23 | my $ipv6 = 1; |
24 | 24 | my $prefix = ""; |
25 | -my @sort = ("start", "region"); | |
25 | +my $cidr; | |
26 | 26 | my $help; |
27 | 27 | |
28 | 28 | GetOptions( |
@@ -34,17 +34,12 @@ GetOptions( | ||
34 | 34 | "orig-rotate=i" => \$orig_rotate, |
35 | 35 | "orig-prefix=s" => \$orig_prefix, |
36 | 36 | "prefix=s" => \$prefix, |
37 | - "sort=s" => \@sort, | |
37 | + "cidr!" => \$cidr, | |
38 | 38 | "help|h|?" => \$help, |
39 | 39 | ) or pod2usage(16); |
40 | 40 | pod2usage(0) if($help); |
41 | 41 | pod2usage(17) if(@ARGV > 1); |
42 | 42 | my $op = $ARGV[0] // "generate"; |
43 | -my @ill_sort = grep{$_ ne "start" && $_ ne "region"}@sort; | |
44 | -if(@ill_sort){ | |
45 | - say STDERR "Unknown sort key: ", join(", ", @ill_sort), "\n"; | |
46 | - pod2usage(18); | |
47 | -} | |
48 | 43 | |
49 | 44 | sub mydie{ |
50 | 45 | my ($exit, @msg) = @_; |
@@ -158,19 +153,21 @@ my @bitmap1 = ( | ||
158 | 153 | "\x{0f}", "\x{1f}", "\x{3f}", "\x{7f}"); |
159 | 154 | sub leftfill{ "\x{0}" x ($_[1]-length($_[0])) . $_[0] } |
160 | 155 | sub cntr0{ |
161 | - my $c; | |
156 | + my $c = 0; | |
162 | 157 | for my $v(reverse split(//, $_[0])){ |
163 | 158 | if($v eq "\x{00}"){ |
164 | 159 | $c += 8; |
165 | 160 | next; |
166 | 161 | } |
167 | 162 | for(reverse 1 .. 7){ |
168 | - if(($v & $bitmap1[$_]) eq "\x{00}"){ | |
163 | + if(($v &. $bitmap1[$_]) eq "\x{00}"){ | |
169 | 164 | $c += $_; |
165 | + last; | |
170 | 166 | } |
171 | 167 | } |
172 | - return $c; | |
168 | + last; | |
173 | 169 | } |
170 | + $c; | |
174 | 171 | } |
175 | 172 | sub num2bin{ pack("L>", $_[0]) =~ s/^\x{0}+//r } |
176 | 173 | sub bin2num{ reduce{$a << 8 | $b}unpack("C*", $_[0]) } |
@@ -187,6 +184,17 @@ sub binadd{ | ||
187 | 184 | push @ret, $carry if($carry); |
188 | 185 | join("", map{chr}@ret); |
189 | 186 | } |
187 | +sub bindec{ | |
188 | + my @v = map{ord}split(//, $_[0]); | |
189 | + my @r; | |
190 | + while($v[$#v] == 0){ | |
191 | + pop @v; | |
192 | + push @r, "\x{ff}"; | |
193 | + die if(!@v); | |
194 | + } | |
195 | + $v[$#v] -= 1; | |
196 | + join("", map{chr}(@v, @r)); | |
197 | +} | |
190 | 198 | my %ascii2addr = ( |
191 | 199 | asn => sub{pack("L>", $_[0])}, |
192 | 200 | ipv4 => sub{pack("C*", split(/\./, $_[0]))}, |
@@ -219,8 +227,205 @@ my %ascii2len = ( | ||
219 | 227 | leftfill($bitmap0[$i%8] . "\x{00}" x int($i/8), 16); |
220 | 228 | }, |
221 | 229 | ); |
230 | +my %len2ascii = ( | |
231 | + asn => sub{unpack("L>", $_[0])}, | |
232 | + ipv4 => sub{unpack("L>", $_[0])}, | |
233 | + ipv6 => sub{ | |
234 | + for(1 .. 128){ | |
235 | + return $_ if($_[0] eq $ascii2len{ipv6}->($_)); | |
236 | + } | |
237 | + "*"; | |
238 | + }, | |
239 | +); | |
240 | +my %asciilen = ( | |
241 | + asn => { | |
242 | + addr => 10, | |
243 | + length => 10, | |
244 | + cidrnm => 2, | |
245 | + }, | |
246 | + ipv4 => { | |
247 | + addr => 15, | |
248 | + length => 10, | |
249 | + cidrnm => 2, | |
250 | + }, | |
251 | + ipv6 => { | |
252 | + addr => 39, | |
253 | + length => 3, | |
254 | + cidrnm => 3, | |
255 | + }, | |
256 | +); | |
257 | +sub sort_by{ | |
258 | + my ($arr, @k) = @_; | |
259 | + sort{reduce{$a || $b}map{$a->{$_} cmp $b->{$_}}@k}@$arr; | |
260 | +} | |
261 | +sub uniq_check{ | |
262 | + my ($arr) = @_; | |
263 | + my @ret; | |
264 | + my $end = "\x{0}" x length $arr->[0]{start}; | |
265 | + for($arr->@*){ | |
266 | + if($end gt $_->{start}){ | |
267 | + push @ret, $_; | |
268 | + } | |
269 | + my $new_end = binadd($_->{start}, $_->{length}); | |
270 | + $end = $new_end if($new_end gt $end); | |
271 | + } | |
272 | + @ret; | |
273 | +} | |
274 | +sub merge_by{ | |
275 | + my ($arr, @k) = @_; | |
276 | + my @ret; | |
277 | + my $end; | |
278 | + my %k; | |
279 | + for my $r(@$arr){ | |
280 | + if( | |
281 | + defined $end | |
282 | + && $end eq $r->{start} | |
283 | + && reduce{$a && $b}map{$k{$_} eq $r->{$_}}@k | |
284 | + ){ | |
285 | + $ret[$#ret] = {$ret[$#ret]->%*}; | |
286 | + delete $ret[$#ret]{cidr}; | |
287 | + $ret[$#ret]{length} = binadd($ret[$#ret]{length}, $r->{length}); | |
288 | + }else{ | |
289 | + $k{$_} = $r->{$_} for(@k); | |
290 | + push @ret, $r; | |
291 | + } | |
292 | + $end = binadd($r->{start}, $r->{length}); | |
293 | + } | |
294 | + @ret; | |
295 | +} | |
296 | +sub get_ascii_formatter{ | |
297 | + my ($type, $cidr, @vals) = @_; | |
298 | + my $addr2ascii = $addr2ascii{$type}; | |
299 | + my $len2ascii = $len2ascii{$type}; | |
300 | + @vals = map{$_->{sub}}@vals; | |
301 | + if($cidr){ | |
302 | + sub{ | |
303 | + my ($r) = @_; | |
304 | + map{ | |
305 | + join("\t", | |
306 | + (map{$_->($r)}@vals), | |
307 | + $addr2ascii->($_->[0]) . "/" . $_->[1] | |
308 | + ) | |
309 | + }$r->{cidr}->@*; | |
310 | + }; | |
311 | + }else{ | |
312 | + sub{ | |
313 | + my ($r) = @_; | |
314 | + join("\t", | |
315 | + (map{$_->($r)}@vals), | |
316 | + $addr2ascii->($r->{start}), | |
317 | + $addr2ascii->(binadd($r->{start}, bindec($r->{length}))), | |
318 | + ); | |
319 | + }; | |
320 | + } | |
321 | +} | |
322 | +sub get_pad_ascii_formatter{ | |
323 | + my ($type, $cidr, @vals) = @_; | |
324 | + my %len_part = $asciilen{$type}->%*; | |
325 | + my $len = @vals | |
326 | + + (sum0 map{$_->{len}}@vals) | |
327 | + + $len_part{addr} | |
328 | + + 1; | |
329 | + if($cidr){ | |
330 | + $len += $len_part{cidrnm}+1; | |
331 | + }else{ | |
332 | + $len += $len_part{addr}+1; | |
333 | + } | |
334 | + my $inner = get_ascii_formatter($type, $cidr, @vals); | |
335 | + sub{ | |
336 | + my $line = $inner->(@_); | |
337 | + my $padn = $len - length $line; | |
338 | + die if($padn < 0); | |
339 | + "$line\t" . " "x$padn; | |
340 | + }; | |
341 | +} | |
342 | +sub get_binary_formatter{ | |
343 | + my ($type, $cidr, @vals) = @_; | |
344 | + @vals = map{$_->{sub}}@vals; | |
345 | + if($cidr){ | |
346 | + sub{ | |
347 | + my ($r) = @_; | |
348 | + map{ | |
349 | + join("", | |
350 | + (map{$_->($r)}@vals), | |
351 | + $_->[0], | |
352 | + chr($_->[1]), | |
353 | + ) | |
354 | + }$r->{cidr}->@*; | |
355 | + }; | |
356 | + }else{ | |
357 | + sub{ | |
358 | + my ($r) = @_; | |
359 | + join("", | |
360 | + (map{$_->($r)}@vals), | |
361 | + $r->{start}, | |
362 | + binadd($r->{start}, bindec($r->{length})), | |
363 | + ); | |
364 | + }; | |
365 | + } | |
366 | +} | |
367 | +sub get_lf_appended{ | |
368 | + my ($inner) = @_; | |
369 | + sub{ map{"$_\n"}$inner->(@_) }; | |
370 | +} | |
371 | +my %region_formatter = ( | |
372 | + sub => sub{$_[0]->{region}}, | |
373 | + len => 2, | |
374 | +); | |
375 | +my %formatters = ( | |
376 | + "cidr.txt" => sub{ | |
377 | + get_lf_appended(get_ascii_formatter($_[0], 1, \%region_formatter)); | |
378 | + }, | |
379 | + "txt" => sub{ | |
380 | + get_lf_appended(get_ascii_formatter($_[0], 0, \%region_formatter)); | |
381 | + }, | |
382 | + "cidr.pad.txt" => sub{ | |
383 | + get_lf_appended(get_pad_ascii_formatter($_[0], 1, \%region_formatter)); | |
384 | + }, | |
385 | + "pad.txt" => sub{ | |
386 | + get_lf_appended(get_pad_ascii_formatter($_[0], 0, \%region_formatter)); | |
387 | + }, | |
388 | + "cidr.bin" => sub{ | |
389 | + get_binary_formatter($_[0], 1, \%region_formatter); | |
390 | + }, | |
391 | + "bin" => sub{ | |
392 | + get_binary_formatter($_[0], 0, \%region_formatter); | |
393 | + }, | |
394 | +); | |
395 | +sub write_it{ | |
396 | + my ($ranges, $file, $formatter) = @_; | |
397 | + open(my $wh, ">:raw", $file) or mydie(4, $!); | |
398 | + for(@$ranges){ | |
399 | + $wh->print($formatter->($_)); | |
400 | + } | |
401 | + $wh->close or mydie(4, $!); | |
402 | +} | |
403 | +sub gen_cidr_inplace{ | |
404 | + my ($arr) = @_; | |
405 | + my $al = length $arr->[0]{start}; | |
406 | + for my $r(@$arr){ | |
407 | + next if($r->{cidr}); | |
408 | + $r->{cidr} = []; | |
409 | + my $p = $r->{start}; | |
410 | + my $end = binadd($p, $r->{length}); | |
411 | + while($p ne $end){ | |
412 | + for(reverse 0 .. cntr0($p)){ | |
413 | + my $step = $bitmap0[$_%8] . "\x{00}" x int($_/8); | |
414 | + my $next = binadd($p, $step); | |
415 | + next if(length $next > $al); | |
416 | + next if($next gt $end); | |
417 | + push $r->{cidr}->@*, [$p, $al*8-$_]; | |
418 | + $p = $next; | |
419 | + last; | |
420 | + } | |
421 | + } | |
422 | + } | |
423 | +} | |
222 | 424 | sub do_load_files{ |
223 | - my %ret = (ipv4 => [], ipv6 => [], asn => []); | |
425 | + my %ret; | |
426 | + $ret{asn} = [] if($asn); | |
427 | + $ret{ipv4} = [] if($ipv4); | |
428 | + $ret{ipv6} = [] if($ipv6); | |
224 | 429 | for my $src(src_spec()->@*){ |
225 | 430 | my $fname = orig_name($src->{name}); |
226 | 431 | if(!-e $fname){ |
@@ -255,23 +460,6 @@ sub do_load_files{ | ||
255 | 460 | } |
256 | 461 | %ret; |
257 | 462 | } |
258 | -sub sort_by_keys{ | |
259 | - my ($arr, @k) = @_; | |
260 | - sort{reduce{$a || $b}map{$a->{$_} cmp $b->{$_}}@k}@$arr; | |
261 | -} | |
262 | -sub uniq_check{ | |
263 | - my ($arr) = @_; | |
264 | - my @ret; | |
265 | - my $end = "\x{0}" x length $arr->[0]{start}; | |
266 | - for($arr->@*){ | |
267 | - if($end gt $_->{start}){ | |
268 | - push @ret, $_; | |
269 | - } | |
270 | - my $new_end = binadd($_->{start}, $_->{length}); | |
271 | - $end = $new_end if($new_end gt $end); | |
272 | - } | |
273 | - @ret; | |
274 | -} | |
275 | 463 | |
276 | 464 | if($op eq "generate"){ |
277 | 465 | if($download){ |
@@ -283,11 +471,22 @@ if($op eq "generate"){ | ||
283 | 471 | } |
284 | 472 | my %reglist = do_load_files(); |
285 | 473 | for my $k(keys %reglist){ |
286 | - #$reglist{$k}->@* = grep{$_->{region} ne "ZZ"}$reglist{$k}->@*; | |
287 | - $reglist{$k}->@* = sort_by_keys($reglist{$k}, "start"); | |
474 | + $reglist{$k}->@* = grep{$_->{region} ne "ZZ"}$reglist{$k}->@*; | |
475 | + $reglist{$k}->@* = sort_by($reglist{$k}, qw(start)); | |
288 | 476 | for(uniq_check($reglist{$k})){ |
289 | 477 | say "Overwrapping $k: ", $addr2ascii{$k}->($_->{start}); |
290 | 478 | } |
479 | + | |
480 | + gen_cidr_inplace($reglist{$k}); | |
481 | + my @merged = merge_by($reglist{$k}, qw(region)); | |
482 | + gen_cidr_inplace(\@merged); | |
483 | + my @regionsort = sort_by(\@merged, qw(region start)); | |
484 | + | |
485 | + write_it( | |
486 | + \@regionsort, | |
487 | + "$k-merged-region.cidr.txt", | |
488 | + $formatters{"cidr.txt"}->($k) | |
489 | + ); | |
291 | 490 | } |
292 | 491 | }elsif($op eq "head-all"){ |
293 | 492 | if(!defined $src_spec){ |
@@ -324,12 +523,12 @@ ip2cc_indexgen.pl - Generate index file to lookup country code from ip address | ||
324 | 523 | =item ip2cc_indexgen.pl |
325 | 524 | |
326 | 525 | [--help] [--asn] [--no-ipv4] [--no-ipv6] |
327 | - [--src-spec=F<src_spec.json>] | |
328 | - [--orig-rotate=I<number>] | |
329 | - [--orig-prefix=I<prefix>] | |
330 | - [--prefix=I<output>] | |
331 | - [--sort=B<sort key>,...] | |
332 | - [I<OPERATION>] | |
526 | + [--src-spec=src_spec.json] | |
527 | + [--orig-rotate=number] | |
528 | + [--orig-prefix=prefix] | |
529 | + [--prefix=output] | |
530 | + [--cidr] | |
531 | + [OPERATION] | |
333 | 532 | |
334 | 533 | =back |
335 | 534 |