Header 1

ioan

ioan biticu's website

edit a translation file in blocks

I have a script that lets you filter certain blocks of a file (where a block can be delimited by two new lines) and put the said blocks into a separate file. You can then work on the new file created and then merge it back into the original.

This can be very useful if you have a PO file that is thousands of lines of code, but you only need to edit 20 blocks from it. Once you have the blocks you can easily pass it over to an AI to do the job for you, instead of waiting for the whole context of the original file to be written back to you.

Here is how it works:

# filter by msgstr ""
python tool.py --action split --input messages.po --output filtered.po --filter "msgstr \"\"" 

# merge the file back into a new file merged.po
python tool.py --action merge --input filtered.po --output merged.po --original messages.po

# override the original
cp merged.po messages.po

And here is the script:

#!/usr/bin/env python3
import os
import json
import argparse

class FileBlockProcessor:
    def __init__(self, delimiter="\n\n", filter_criterion="msgstr \"\""):
        self.delimiter = delimiter
        self.filter_criterion = filter_criterion
        self.block_map = {}  # Maps indices to original blocks
        self.filtered_indices = []  # Keeps track of which indices were filtered
        self.line_ending = None  # Store the original file's line ending

    def detect_line_ending(self, content):
        """Detect the line ending used in the file"""
        if '\r\n' in content:
            return '\r\n'  # Windows
        else:
            return '\n'    # Unix/Linux/Mac

    def split_file(self, input_file, output_file):
        """
        Splits the input file into blocks, filters them, and writes filtered blocks to output file.
        Also creates a metadata file to track original positions.
        """
        try:
            # Resolve symbolic links to get the real path
            real_input_file = os.path.realpath(input_file)
            
            # Open in binary mode first to detect line endings
            with open(real_input_file, 'rb') as f:
                binary_content = f.read()
            
            # Convert to text for processing
            content = binary_content.decode('utf-8')
            
            # Detect and store line ending
            self.line_ending = self.detect_line_ending(content)
            
            # Split content by delimiter (with normalized line endings)
            delimiter = self.delimiter.replace('\n', self.line_ending)
            blocks = content.split(delimiter)
            
            # Store all blocks with their indices
            for i, block in enumerate(blocks):
                self.block_map[i] = block
            
            # Filter blocks based on criterion
            filtered_blocks = []
            for i, block in enumerate(blocks):
                if self.filter_criterion in block:
                    filtered_blocks.append(block)
                    self.filtered_indices.append(i)
            
            # Resolve output file path if it's a symbolic link
            real_output_file = os.path.realpath(output_file)
            
            # Create output directory if it doesn't exist
            os.makedirs(os.path.dirname(real_output_file), exist_ok=True)
            
            # Write filtered blocks to output file (binary mode to preserve line endings)
            with open(real_output_file, 'wb') as f:
                f.write(delimiter.join(filtered_blocks).encode('utf-8'))
            
            # Save metadata for later merging
            metadata_file = f"{real_output_file}.metadata"
            with open(metadata_file, 'w', encoding='utf-8') as f:
                json.dump({
                    "delimiter": self.delimiter,
                    "filtered_indices": self.filtered_indices,
                    "total_blocks": len(blocks),
                    "line_ending": "CRLF" if self.line_ending == '\r\n' else "LF",
                    "original_file": input_file,  # Store original path including symlink
                    "real_input_file": real_input_file  # Store resolved path
                }, f)
            
            print(f"Split file successfully. {len(filtered_blocks)} blocks extracted to {output_file}")
            print(f"Metadata saved to {metadata_file}")
            
            return True
        
        except Exception as e:
            print(f"Error splitting file: {e}")
            return False
    
    def merge_file(self, original_file, edited_file, output_file):
        """
        Merges the edited blocks back into the original file.
        """
        try:
            # Resolve symbolic links to get real paths
            real_original_file = os.path.realpath(original_file)
            real_edited_file = os.path.realpath(edited_file)
            real_output_file = os.path.realpath(output_file)
            
            # Load metadata
            metadata_file = f"{real_edited_file}.metadata"
            if not os.path.exists(metadata_file):
                raise FileNotFoundError(f"Metadata file {metadata_file} not found. Cannot merge without metadata.")
            
            with open(metadata_file, 'r', encoding='utf-8') as f:
                metadata = json.load(f)
            
            # Set line ending from metadata
            self.line_ending = '\r\n' if metadata.get("line_ending") == "CRLF" else '\n'
            
            # Read edited content (binary mode to preserve line endings)
            with open(real_edited_file, 'rb') as f:
                binary_content = f.read()
            edited_content = binary_content.decode('utf-8')
            
            # Normalize delimiter based on detected line ending
            delimiter = metadata["delimiter"].replace('\n', self.line_ending)
            
            # Split edited content into blocks
            edited_blocks = edited_content.split(delimiter)
            
            if len(edited_blocks) != len(metadata["filtered_indices"]):
                raise ValueError(f"Number of edited blocks ({len(edited_blocks)}) doesn't match original filtered blocks ({len(metadata['filtered_indices'])})")
            
            # Read original file if necessary (binary mode)
            if real_original_file != real_output_file:
                with open(real_original_file, 'rb') as f:
                    binary_content = f.read()
                original_content = binary_content.decode('utf-8')
                blocks = original_content.split(delimiter)
            else:
                # Reconstruct from block_map if we're overwriting the original
                blocks = [self.block_map.get(i, "") for i in range(metadata["total_blocks"])]
            
            # Replace filtered blocks with edited versions
            for idx, original_idx in enumerate(metadata["filtered_indices"]):
                if original_idx < len(blocks):
                    blocks[original_idx] = edited_blocks[idx]
            
            # Create output directory if it doesn't exist
            os.makedirs(os.path.dirname(real_output_file), exist_ok=True)
            
            # Write the merged content back (binary mode to preserve line endings)
            with open(real_output_file, 'wb') as f:
                f.write(delimiter.join(blocks).encode('utf-8'))
            
            print(f"Merged file successfully created at {output_file}")
            return True
        
        except Exception as e:
            print(f"Error merging file: {e}")
            return False


def main():
    parser = argparse.ArgumentParser(description='Process files by splitting into blocks, filtering, and merging')
    parser.add_argument('--action', required=True, choices=['split', 'merge'], help='Action to perform')
    parser.add_argument('--input', required=True, help='Input file path')
    parser.add_argument('--output', required=True, help='Output file path')
    parser.add_argument('--delimiter', default='\n\n', help='Block delimiter (default: two newlines)')
    parser.add_argument('--filter', default='msgstr ""', help='Filter criterion (default: msgstr "")')
    parser.add_argument('--original', help='Original file path (required for merge action)')
    parser.add_argument('--follow-symlinks', action='store_true', default=True, 
                        help='Follow symbolic links (default: True)')
    
    args = parser.parse_args()
    
    processor = FileBlockProcessor(delimiter=args.delimiter, filter_criterion=args.filter)
    
    if args.action == 'split':
        processor.split_file(args.input, args.output)
    
    elif args.action == 'merge':
        if not args.original:
            print("Error: --original is required for merge action")
            return
        processor.merge_file(args.original, args.input, args.output)


if __name__ == "__main__":
    main()

nginx-proxy limit connections to cloudflare

Here is how to limit the ips that can connect to your nginx-proxy container

The IPs are available on Cloudflare's website and they rarely change.

What I'm going to show below works as of Sep 28, 2023.

File: ./conf/cloudflare-ip.conf

# Cloudflare IP configuration (http level)
geo $realip_remote_addr $cloudflare_ip {
    default 0;
    
    # IPv4 ranges
    173.245.48.0/20 1;
    103.21.244.0/22 1;
    103.22.200.0/22 1;
    103.31.4.0/22 1;
    141.101.64.0/18 1;
    108.162.192.0/18 1;
    190.93.240.0/20 1;
    188.114.96.0/20 1;
    197.234.240.0/22 1;
    198.41.128.0/17 1;
    162.158.0.0/15 1;
    104.16.0.0/13 1;
    104.24.0.0/14 1;
    172.64.0.0/13 1;
    131.0.72.0/22 1;
    
    # IPv6 ranges
    2400:cb00::/32 1;
    2606:4700::/32 1;
    2803:f800::/32 1;
    2405:b500::/32 1;
    2405:8100::/32 1;
    2a06:98c0::/29 1;
    2c0f:f248::/32 1;
}

map $cloudflare_ip $allowed_ip {
    0 0;
    1 1;
}

File: ./conf/cloudflare.conf

# Cloudflare IP ranges
# IPv4
set_real_ip_from 173.245.48.0/20;
set_real_ip_from 103.21.244.0/22;
set_real_ip_from 103.22.200.0/22;
set_real_ip_from 103.31.4.0/22;
set_real_ip_from 141.101.64.0/18;
set_real_ip_from 108.162.192.0/18;
set_real_ip_from 190.93.240.0/20;
set_real_ip_from 188.114.96.0/20;
set_real_ip_from 197.234.240.0/22;
set_real_ip_from 198.41.128.0/17;
set_real_ip_from 162.158.0.0/15;
set_real_ip_from 104.16.0.0/13;
set_real_ip_from 104.24.0.0/14;
set_real_ip_from 172.64.0.0/13;
set_real_ip_from 131.0.72.0/22;

# IPv6
set_real_ip_from 2400:cb00::/32;
set_real_ip_from 2606:4700::/32;
set_real_ip_from 2803:f800::/32;
set_real_ip_from 2405:b500::/32;
set_real_ip_from 2405:8100::/32;
set_real_ip_from 2a06:98c0::/29;
set_real_ip_from 2c0f:f248::/32;

# Use CF-Connecting-IP header
real_ip_header CF-Connecting-IP;

File: ./vhost.d/default

# Block access if not from Cloudflare
if ($cloudflare_ip = 0) {
    return 403 "Access denied: Direct IP access not allowed";
}

File: docker-compose.yml

  nginx-proxy:
    image: nginxproxy/nginx-proxy:alpine
    container_name: nginx-proxy
    ports:
      - "80:80"
      - "443:443"
      - "443:443/udp"
    volumes:
      - /var/run/docker.sock:/tmp/docker.sock:ro
      - ./conf/cloudflare.conf:/etc/nginx/conf.d/cloudflare.conf:ro
      - ./conf/cloudflare-ip.conf:/etc/nginx/conf.d/cloudflare-ip.conf:ro
      - ./vhost.d/default:/etc/nginx/vhost.d/default:ro
      - certs:/etc/nginx/certs
      - vhost2:/etc/nginx/vhost.d
      - html:/usr/share/nginx/html
    environment: # for debugging purposes, feel free to remove
    - |
      LOG_FORMAT=$${remote_addr} - $${remote_user} [$${time_local}] 
      "$${request}" $${status} $${body_bytes_sent} 
      "$${http_referer}" "$${http_user_agent}"
      CF-IP:$${http_cf_connecting_ip} 
      Cloudflare:$${cloudflare_ip}
    restart: always
    networks:
      - proxy-network
    labels:
      - "com.github.jrcs.letsencrypt_nginx_proxy_companion.nginx_proxy=true"