|
| 1 | +import os |
| 2 | +import re |
| 3 | +import yaml |
| 4 | +import shutil |
| 5 | +from urllib.parse import urlparse |
| 6 | + |
| 7 | +def load_config(): |
| 8 | + with open('.github/url_dir_map_config.yml', 'r', encoding='utf-8') as f: |
| 9 | + return yaml.safe_load(f) |
| 10 | + |
| 11 | +def extract_original_url(content): |
| 12 | + match = re.search(r'<!--\s*tcd_original_link\s+(.*?)\s*-->', content) |
| 13 | + return match.group(1) if match else None |
| 14 | + |
| 15 | +def get_target_dir(url, config): |
| 16 | + if not url: |
| 17 | + return "未分类" |
| 18 | + |
| 19 | + domain = urlparse(url).netloc |
| 20 | + for mapping in config['url_mappings']: |
| 21 | + if mapping['domain'] in domain: |
| 22 | + return mapping['dir'] |
| 23 | + return "未分类" |
| 24 | + |
| 25 | +def get_unique_filename(target_path, filename): |
| 26 | + base, ext = os.path.splitext(filename) |
| 27 | + counter = 1 |
| 28 | + new_path = os.path.join(target_path, filename) |
| 29 | + |
| 30 | + while os.path.exists(new_path): |
| 31 | + new_filename = f"{base}_{counter}{ext}" |
| 32 | + new_path = os.path.join(target_path, new_filename) |
| 33 | + counter += 1 |
| 34 | + |
| 35 | + return new_path |
| 36 | + |
| 37 | +def main(): |
| 38 | + config = load_config() |
| 39 | + workspace_dir = 'workspace' |
| 40 | + |
| 41 | + # Create target directories if they don't exist |
| 42 | + for mapping in config['url_mappings']: |
| 43 | + dir_path = os.path.join(workspace_dir, mapping['dir']) |
| 44 | + os.makedirs(dir_path, exist_ok=True) |
| 45 | + |
| 46 | + # Process markdown files |
| 47 | + for root, _, files in os.walk(workspace_dir): |
| 48 | + for file in files: |
| 49 | + if not file.endswith('.md'): |
| 50 | + continue |
| 51 | + |
| 52 | + file_path = os.path.join(root, file) |
| 53 | + |
| 54 | + # Skip files that are already in target directories |
| 55 | + if any(mapping['dir'] in file_path for mapping in config['url_mappings']): |
| 56 | + continue |
| 57 | + |
| 58 | + with open(file_path, 'r', encoding='utf-8') as f: |
| 59 | + content = f.read() |
| 60 | + |
| 61 | + url = extract_original_url(content) |
| 62 | + target_dir = get_target_dir(url, config) |
| 63 | + target_dir_path = os.path.join(workspace_dir, target_dir) |
| 64 | + |
| 65 | + # Create target directory if it doesn't exist |
| 66 | + os.makedirs(target_dir_path, exist_ok=True) |
| 67 | + |
| 68 | + # Get unique filename in target directory |
| 69 | + new_file_path = get_unique_filename(target_dir_path, file) |
| 70 | + |
| 71 | + # Move file |
| 72 | + shutil.move(file_path, new_file_path) |
| 73 | + print(f"Moved {file} to {target_dir}") |
| 74 | + |
| 75 | +if __name__ == "__main__": |
| 76 | + main() |
0 commit comments